This is an analysis performed on the bulk RNA-Seq dataset
The workflow is as follows:
1. Inspect variance in the dataset (Filtering, Normalization, PCA)
2. Perform differential expression analysis (Using limma)
3. Annotate biological signatures found by DEA (GSEA)
library(edgeR)
library(fgsea)
library(qusage)
library(DESeq2)
library(org.Hs.eg.db)
library(ggplot2)
library(enrichR)
library(VennDiagram)
library(ggpubr)
library(ggrepel)
library(PCAtools)
library(RColorBrewer)
library(ggiraph)
library(UpSetR)
library(rhandsontable)
library(nloptr)
library(ggVennDiagram)
library(BiocManager)
library(stringr)
library(knitr)
Counts list
dat = read.csv('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/data/merged_counts.apr2022.csv',row.names=1)
knitr::kable(dat[1:6, 1:12], caption ="A Knitr kable")
A Knitr kable
| ENSG00000000003 |
TSPAN6 |
131.00 |
46.00 |
65.00 |
83.00 |
59.00 |
79.00 |
98.00 |
56.00 |
57.0 |
220.00 |
74.00 |
| ENSG00000000005 |
TNMD |
1.00 |
1.00 |
0.00 |
0.00 |
2.00 |
2.00 |
1.00 |
0.00 |
0.0 |
5.00 |
1.00 |
| ENSG00000000419 |
DPM1 |
81.00 |
103.00 |
116.00 |
121.00 |
128.00 |
141.00 |
91.00 |
61.00 |
68.0 |
66.00 |
122.00 |
| ENSG00000000457 |
SCYL3 |
91.24 |
105.74 |
158.38 |
120.97 |
107.41 |
115.13 |
93.44 |
64.38 |
69.5 |
74.44 |
168.65 |
| ENSG00000000460 |
C1orf112 |
95.86 |
56.26 |
77.62 |
72.03 |
68.59 |
67.87 |
44.56 |
59.62 |
50.5 |
91.56 |
88.35 |
| ENSG00000000938 |
FGR |
28.00 |
11.00 |
18.00 |
15.00 |
16.00 |
6.00 |
58.00 |
44.00 |
28.0 |
56.00 |
16.00 |
Meta data
meta = read.table('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/data/meta_data.apr2022.csv',row.names=1,sep = ",")
names(meta)[names(meta) == "type"] <- "lesion_type"
meta$sex = 'M'
meta[which(meta$sample=="AB203"),]$sex = 'F'
meta.tmp <- meta[c(2,7,10,11,12,13)]
knitr::kable(meta.tmp[1:8,], caption ="A Knitr kable")
A Knitr kable
| S13 |
batch1 |
WML |
WM |
Lesion |
AB187 |
M |
| S14 |
batch1 |
NAGM |
Cortical |
Normal |
AB187 |
M |
| S23 |
batch1 |
CL |
Cortical |
Lesion |
AB203 |
F |
| S26 |
batch1 |
WML |
WM |
Lesion |
AB187 |
M |
| S29 |
batch1 |
CL |
Cortical |
Lesion |
AB187 |
M |
| S33 |
batch1 |
CL |
Cortical |
Lesion |
AB200 |
M |
| S37 |
batch1 |
WML |
WM |
Lesion |
AB200 |
M |
| S38 |
batch1 |
NAGM |
Cortical |
Normal |
AB200 |
M |
Define functions for the analysis: - find_enrichments(): For a given
comparison (DEA) extract enrichments for GO(BP), GO(MF), KEGG and
TFT
- fisher_enrichment(): Perform an enrichment of genesets using a fisher
test approach - plot.gene(): Plot gene expression for a given gene
across conditions.
The gmt files contain the genesets for GSEA
go = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.bp.v7.5.1.symbols.gmt')
mf = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.mf.v7.5.1.symbols.gmt')
kegg = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c2.cp.kegg.v7.5.1.symbols.gmt')
tft = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c3.tft.v7.5.1.symbols.gmt')
sign_cols=c(DN='deepskyblue',NS='grey80',UP='tomato')
plot.gene <- function(x){
gene_id = rownames(subset(resA,gene_name==x))[1]
meta$gene = y$E[gene_id,rownames(meta)]
ggplot(subset(meta,lesion_type!="Mixed"),aes(lesion_type,gene))+geom_boxplot()+geom_point(aes(col=sample))+theme_bw()+ ggtitle(x)
}
# plot.geneP("SOD2",cols=con_colorsP)
plot.geneP <- function(symbol,x='group',cols){
geneid = subset(convP,SYMBOL==symbol)$ENSEMBL
meta.tmp = metadataP
meta.tmp$expr = EP$E[geneid,rownames(meta.tmp)]
meta.tmp$group <- factor(meta.tmp$group , levels=c('CONTROL','NAWM','LESION'))
pl = ggplot(meta.tmp,aes(group,expr,fill=group))+geom_boxplot(width=.75,size=.75)+theme_bw()+scale_fill_manual(values=cols)+
labs(title = symbol, x = "Group", y = "Expr") + xlab("") + ylab("")
return(pl)
}
fisher_enrichment<-function(cluster_markers,pathway,universe,pathway_name){
enrichment_tables=list()
for(cluster in 1:length(cluster_markers)){
cluster_name=names(cluster_markers)[cluster]
output <- lapply(pathway, function(x) {
freq.table <- table(factor(universe %in% as.character(cluster_markers[[cluster]]),
levels = c(TRUE,FALSE)),
factor(universe %in% x,
levels = c(TRUE,FALSE)))
fit <- fisher.test(freq.table, alternative = "greater")
interSection <- intersect(cluster_markers[[cluster]], x)
interSection <- paste(interSection, collapse = ",")
return(value = c(NOM_pval = fit$p.value, INTERSECT = interSection,"OR"=fit$estimate))})
term_names=character()
for (pathway.term in 1:length(output)){
term_names[pathway.term]=pathway[[pathway.term]][1]
}
results=data.frame(do.call("rbind",output))
results$fdr=p.adjust(as.numeric(as.character(results$NOM_pval)),method = "BH")
results=results[order(results$fdr),]
enrichment_tables[[cluster_name]]=results
}
return(enrichment_tables)
}
find_enrichments<-function(resA,n=5,show='both'){
resA.t = resA$t
names(resA.t)=resA$gene_name
if(show=='up'){
resA.bp = subset(fgsea(go,resA.t),NES>0)
resA.mf = subset(fgsea(mf,resA.t),NES>0)
resA.kegg= subset(fgsea(kegg,resA.t),NES>0)
resA.tft= subset(fgsea(tft,resA.t),NES>0)
}else if(show=='down'){
resA.bp = subset(fgsea(go,resA.t),NES<0)
resA.mf = subset(fgsea(mf,resA.t),NES<0)
resA.kegg= subset(fgsea(kegg,resA.t),NES<0)
resA.tft= subset(fgsea(tft,resA.t),NES<0)
}else{
resA.bp = fgsea(go,resA.t)
resA.mf = fgsea(mf,resA.t)
resA.kegg= fgsea(kegg,resA.t)
resA.tft= subset(fgsea(tft,resA.t))
}
resA.bp$GO = 'BP'
resA.mf$GO = 'MF'
resA.kegg$GO = 'KEGG'
out=list(enr=list(bp=resA.bp,mf=resA.mf,kegg=resA.kegg,tft=resA.tft))
enr = rbind(
head(resA.bp[order(resA.bp$padj),],n),
head(resA.mf[order(resA.mf$padj),],n),
head(resA.kegg[order(resA.kegg$padj),],n)
)
enr=data.frame(enr)
enr$pathway=gsub('_',' ',gsub('GO_|KEGG_','',enr$pathway))
enr$pathway=factor(enr$pathway,levels=as.character(enr[order(enr$GO,enr$NES),'pathway']))
p=ggplot(enr,aes(pathway,NES,fill=GO))+geom_col(width=.6,alpha=.6)+geom_col(width=.6,fill=NA,aes(col=GO))+coord_flip()+theme_bw()+geom_vline(xintercept=c(n,n*2)+.5,linetype='dashed')+geom_hline(yintercept=0)
out$plot=p
return(out)
}
##top pathway
topPath<- function(fish ,specific, p ,n){
l=n*10
result <- matrix(NA, l, l)
colnames(result) <- rownames(result) <- rownames(getElement(fish, specific))[1:l]
for(i in 1:l){
for(j in 1:l){
minlist = 0
if(length(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")))>=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ",")))) minlist=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ",")))
else minlist=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")))
result[i, j] <- length(intersect(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")), unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ","))))/(minlist)
result[j, i] <- result[i, j]
}
}
listBPspecific = list(c(rownames(result)[1]))
i=1
while(i < l){
j=i
while(j < l){
if(result[i, j] < p){
i=j
listBPspecific <- c(listBPspecific, rownames(result)[j])
j=l
}else {
j=j+1
}
}
if(length(listBPspecific)>n-1)
i=l
}
return(listBPspecific)
}
Differential expression analysis with limma-voom
Filtering to remove lowly expressed genes + Normalization for
composition bias
use the edgeR/limma workflow to filter out lowly expressed
genes. Voom is used to normalize the raw counts. A linear mixed
model is used to fit the expression values. Defining the model is done
with the model.matrix function of edgeR. In the model are included
factors of interest (group). If a batch/patient effect is detected, it
can be added to the model to account for its effect.
d0 <- DGEList(dat[-1])
d0 <- calcNormFactors(d0)
cutoff <- 2
drop <- which(apply(cpm(d0), 1, max) < cutoff)
d <- d0[-drop,]
mm <- model.matrix(~ 0 + lesion_type+batch+sample, data=meta[colnames(d),])
# Without filtering low expressed genes
y <- voom(d0[,rownames(meta)], mm, plot = T)

# Filtering low expressed genes
y <- voom(d[,rownames(meta)], mm, plot = T)

##gene name reference(ref_G)
conv_B=AnnotationDbi::select(org.Hs.eg.db,keys=rownames(y$E),keytype='ENSEMBL',columns=c('GENENAME','SYMBOL'))
'select()' returned 1:many mapping between keys and columns
ref_G=conv_B[!duplicated(conv_B[,1]),]
rownames(ref_G)=ref_G[,1]
##
PCA
Perform a PCA on the normalized samples and identify main
factors of variation.
Data including batch effect
BD <- pca(y$E, metadata = meta)
biplot(BD, colby = 'batch',hline = 0, vline = 0,legendPosition = 'right', lab="",title = "Data including batch effect + patient effect")

Only Batch effect removed from data (color by patient)
BDCorr <- removeBatchEffect(y$E,batch = meta$batch)
BD1 <- pca(BDCorr, metadata = meta)
biplot(BD1, colby = 'sample',shape = 'sex', hline = 0, vline = 0,legendPosition = 'right', lab="",title="Only Batch effect removed from data")

Only Batch effect removed from data (color by batch)
biplot(BD1B, colby = 'batch',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Only Batch effect removed from data")
Patient+Batch effect removed from data (color by sample)
BDCorrBS <- removeBatchEffect(y$E,batch = meta$batch, batch2 = meta$sample)
BD1BS <- pca(BDCorrBS, metadata = meta)
biplot(BD1BS, colby = 'sample',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Batch+Patient effect removed from data")
Patient+Batch effect removed from data (color by lesion_type and
sex)
biplot(BD1BS, colby = 'lesion_type',shape = 'sex',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Batch+Patient effect removed from data")
Specify Contrast(s) of interest
Use a linear model to fit voomed counts (logCPM). Instanciate the
comparisons of interest by using the contrast approach from
limma. Extract the results, annotate the tables and merge them
into a global result dataframe.
fit <- lmFit(y, mm)
Warning message:
In diff.default(xscale) : reached elapsed time limit
contr <- makeContrasts(
A = lesion_typeWML - lesion_typeNAWM,
B = lesion_typeWML - lesion_typeCL,
C = lesion_typeCL - lesion_typeNAGM,
D = lesion_typeNAWM - lesion_typeNAGM,
E = lesion_typePVML - lesion_typeNAWM,
F = lesion_typePVML - lesion_typeWML,
G = lesion_typePVML - lesion_typeCL,
levels = colnames(coef(fit)))
tmp <- contrasts.fit(fit, contr)
tmp <- eBayes(tmp)
resA <- topTable(tmp, sort.by = "P", n = Inf,coef='A')
resB <- topTable(tmp, sort.by = "P", n = Inf,coef='B')
resC <- topTable(tmp, sort.by = "P", n = Inf,coef='C')
resD <- topTable(tmp, sort.by = "P", n = Inf,coef='D')
resE <- topTable(tmp, sort.by = "P", n = Inf,coef='E')
resF <- topTable(tmp, sort.by = "P", n = Inf,coef='F')
resG <- topTable(tmp, sort.by = "P", n = Inf,coef='G')
Comparisons between conditions (Volcano plot)
Plot results in the form of volcano plots. Thresholds used are
adj.P.Val<0.05 & |logFC|>1. Red genes are upregulated in the
first group as found in the subtitle of each graph.
PVML_vs_NAWM
resE$gene_id = rownames(resE)
resE$comp = 'PVML_vs_NAWM'
resE$gene_name = ref_G[rownames(resE),3]
resE=resE[order(-abs(resE$logFC)),]
resE$show = F
resE[1:15,'show']=T
resE$sign = 'NS'
resE[which(resE$adj.P.Val<0.05&resE$logFC>1),'sign']='UP'
resE[which(resE$adj.P.Val<0.05&resE$logFC<(-1)),'sign']='DN'
ggplot(resE,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resE,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_NAWM')

WML_vs_NAWM
resA$gene_id = rownames(resA)
resA$comp = 'WML_vs_NAWM'
resA$gene_name = ref_G[rownames(resA),3]
resA=resA[order(-abs(resA$logFC)),]
resA$show = F
resA[1:15,'show']=T
resA$sign = 'NS'
resA[which(resA$adj.P.Val<0.05&resA$logFC>1),'sign']='UP'
resA[which(resA$adj.P.Val<0.05&resA$logFC<(-1)),'sign']='DN'
ggplot(resA,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resA,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('WML_vs_NAWM')

WML_vs_CL
resB$gene_id = rownames(resB)
resB$comp = 'WML_vs_CL'
resB$gene_name = ref_G[rownames(resB),3]
resB=resB[order(-abs(resB$logFC)),]
resB$show = F
resB[1:15,'show']=T
resB$sign = 'NS'
resB[which(resB$adj.P.Val<0.05&resB$logFC>1),'sign']='UP'
resB[which(resB$adj.P.Val<0.05&resB$logFC<(-1)),'sign']='DN'
ggplot(resB,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resB,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('WML_vs_CL')

CL_vs_NAGM
resC$gene_id = rownames(resC)
resC$comp = 'CL_vs_NAGM'
resC$gene_name = ref_G[rownames(resC),3]
resC=resC[order(-abs(resC$logFC)),]
resC$show = F
resC[1:15,'show']=T
resC$sign = 'NS'
resC[which(resC$adj.P.Val<0.05&resC$logFC>1),'sign']='UP'
resC[which(resC$adj.P.Val<0.05&resC$logFC<(-1)),'sign']='DN'
ggplot(resC,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resC,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('CL_vs_NAGM')

NAWM_vs_NAGM
resD$gene_id = rownames(resD)
resD$comp = 'NAWM_vs_NAGM'
resD$gene_name = ref_G[rownames(resD),3]
resD=resD[order(-abs(resD$logFC)),]
resD$show = F
resD[1:15,'show']=T
resD$sign = 'NS'
resD[which(resD$adj.P.Val<0.05&resD$logFC>1),'sign']='UP'
resD[which(resD$adj.P.Val<0.05&resD$logFC<(-1)),'sign']='DN'
ggplot(resD,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resD,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('NAWM_vs_NAGM')

PVML_vs_WML
resF$gene_id = rownames(resF)
resF$comp = 'PVML_vs_WML'
resF$gene_name = ref_G[rownames(resF),3]
resF=resF[order(-abs(resF$logFC)),]
resF$show = F
resF[1:15,'show']=T
resF$sign = 'NS'
resF[which(resF$adj.P.Val<0.05&resF$logFC>1),'sign']='UP'
resF[which(resF$adj.P.Val<0.05&resF$logFC<(-1)),'sign']='DN'
ggplot(resF,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resF,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_WML')

PVML_vs_CL
resG$gene_id = rownames(resG)
resG$comp = 'PVML_vs_CL'
resG$gene_name = ref_G[rownames(resG),3]
resG=resG[order(-abs(resG$logFC)),]
resG$show = F
resG[1:15,'show']=T
resG$sign = 'NS'
resG[which(resG$adj.P.Val<0.05&resG$logFC>1),'sign']='UP'
resG[which(resG$adj.P.Val<0.05&resG$logFC<(-1)),'sign']='DN'
ggplot(resG,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resG,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_CL')

Boxplots of select genes demonstrating significant differential gene
expression.
CIBAR2 <- plot.gene("CIBAR2")
DNAH5 <- plot.gene("DNAH5")
irf1 <- plot.gene("IRF1")
LRRIQ1 <- plot.gene("LRRIQ1")
icam1 <- plot.gene("ICAM1")
ODAD2 <- plot.gene("ODAD2")
vcam1 <- plot.gene("VCAM1")
CD24 <- plot.gene("CD24")
dlec1 <- plot.gene("DLEC1")
cfap54 <- plot.gene("CFAP54")
fhad1 <- plot.gene("FHAD1")
dynlt5 <- plot.gene("DYNLT5")
cfap43 <- plot.gene("CFAP43")
rsph1 <- plot.gene("RSPH1")
dnai3 <- plot.gene("DNAI3")
cxcl14 <- plot.gene("CXCL14")
ggarrange(CIBAR2, DNAH5, LRRIQ1, ODAD2, irf1, CD24, icam1, dlec1, cfap54, vcam1, fhad1, dynlt5, cfap43, rsph1, dnai3, cxcl14, ncol = 4, nrow = 4,align = c("hv"), common.legend = TRUE, legend="bottom")


Compare fold-changes across conditions.
df = data.frame(cbind(resE$logFC,resA[rownames(resE),'logFC'],resB[rownames(resE),'logFC'],resF[rownames(resE),'logFC']))
colnames(df)=c('WML_vs_NAWM','PVML_vs_NAWM','WML_vs_CL','PVML_vs_WML')
p1=ggplot(df,aes(WML_vs_NAWM,PVML_vs_NAWM))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p2=ggplot(df,aes(WML_vs_NAWM,WML_vs_CL))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p3=ggplot(df,aes(WML_vs_NAWM,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p4=ggplot(df,aes(PVML_vs_NAWM,WML_vs_CL))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p5=ggplot(df,aes(PVML_vs_NAWM,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p6=ggplot(df,aes(WML_vs_CL,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p1
`geom_smooth()` using formula 'y ~ x'

p2
`geom_smooth()` using formula 'y ~ x'

p3
`geom_smooth()` using formula 'y ~ x'

p4
`geom_smooth()` using formula 'y ~ x'

p5
`geom_smooth()` using formula 'y ~ x'

p6
`geom_smooth()` using formula 'y ~ x'

Gene ontology term enrichment analysis of comparisons
between conditions
This extracts biological signatures from the transcriptomic changes
found in the differential expression analysis
enr.a=find_enrichments(resA)
enr.b=find_enrichments(resB)
enr.c=find_enrichments(resC)
enr.d=find_enrichments(resD)
enr.e=find_enrichments(resE)
enr.f=find_enrichments(resF)
enr.g=find_enrichments(resG)
Biological Processes
enrA = enr.a$enr$bp
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)
enrB = enr.b$enr$bp
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)
enrC = enr.c$enr$bp
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)
enrD = enr.d$enr$bp
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)
enrE = enr.e$enr$bp
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)
enrF = enr.f$enr$bp
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)
enrG = enr.g$enr$bp
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)
paths = c(head(enrA,10)$pathway,
head(enrB,10)$pathway,
head(enrC,10)$pathway,
head(enrD,10)$pathway,
head(enrE,10)$pathway,
head(enrF,10)$pathway,
head(enrG,10)$pathway )
paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("GOBP", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))

Molecular Function
enrA = enr.a$enr$mf
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)
enrB = enr.b$enr$mf
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)
enrC = enr.c$enr$mf
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)
enrD = enr.d$enr$mf
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)
enrE = enr.e$enr$mf
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)
enrF = enr.f$enr$mf
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)
enrG = enr.g$enr$mf
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)
paths = c(head(enrA,10)$pathway,
head(enrB,10)$pathway,
head(enrC,10)$pathway,
head(enrD,10)$pathway,
head(enrE,10)$pathway,
head(enrF,10)$pathway,
head(enrG,10)$pathway )
paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrB[which(enrB$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrC[which(enrC$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrD[which(enrD$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrE[which(enrE$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrF[ which(enrF$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')],
enrG[which(enrG$pathway%in%paths),c('padj','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("GOMF", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
ggplot(na.omit(rb),aes(pathway,comp,fill=NES,size=n_genes))+geom_point(shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text(angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))

Transcription factor targets
enrA = enr.a$enr$tft
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)
enrB = enr.b$enr$tft
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)
enrC = enr.c$enr$tft
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)
enrD = enr.d$enr$tft
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)
enrE = enr.e$enr$tft
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)
enrF = enr.f$enr$tft
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)
enrG = enr.g$enr$tft
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)
paths = c(head(enrA,10)$pathway,
head(enrB,10)$pathway,
head(enrC,10)$pathway,
head(enrD,10)$pathway,
head(enrE,10)$pathway,
head(enrF,10)$pathway,
head(enrG,10)$pathway )
paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("_", " ",rb$pathway)
ss = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))

KEGG pathway database
enrA = enr.a$enr$kegg
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)
enrB = enr.b$enr$kegg
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)
enrC = enr.c$enr$kegg
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)
enrD = enr.d$enr$kegg
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)
enrE = enr.e$enr$kegg
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)
enrF = enr.f$enr$kegg
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)
enrG = enr.g$enr$kegg
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)
paths = c(head(enrA,10)$pathway,
head(enrB,10)$pathway,
head(enrC,10)$pathway,
head(enrD,10)$pathway,
head(enrE,10)$pathway,
head(enrF,10)$pathway,
head(enrG,10)$pathway )
paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("KEGG", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))

Plot the overlap between DEGs with an upset plot/Venn Diagram
Overlap of DEGs in comparaisons
res=rbind(resA,resB,resC,resD,resE,resF,resG)
res= na.omit(res)
degsListvs = list(
WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
degsListUP = list(
WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & (logFC)>1)$gene_name)
degsListDOWN = list(
WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & (logFC)<1)$gene_name)
UpSet plot
upset(fromList(degsListvs),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))


#upset(fromList(degsListUP),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))
#upset(fromList(degsListDOWN),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))
Venn Diagram
#label = "count"
ggVennDiagram(degsListvs,label = "percent", label_alpha = 0)+ scale_fill_distiller(palette = "Reds", direction = 1)

Gene Ontology enrichment analysis of overlapping
genes in comparisons between conditions(The top 5 specific Genesets from
the upsets plot)
WML_vs_NAWM=na.omit(subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
PVML_vs_WML=na.omit(subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
PVML_vs_NAWM=na.omit(subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
CL_vs_NAWM=na.omit(subset(res,comp=='CL_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
WML_vs_CL=na.omit(subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
NAWM_vs_NAGM=na.omit(subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
specificWML_vs_CL = as.character(WML_vs_CL[!WML_vs_CL%in%WML_vs_NAWM & !WML_vs_CL%in%PVML_vs_WML & !WML_vs_CL%in%PVML_vs_NAWM & !WML_vs_CL%in%CL_vs_NAWM & !WML_vs_CL%in%NAWM_vs_NAGM])
specificPVML_vs_NAWM = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & !PVML_vs_NAWM%in%PVML_vs_WML & !PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])
commonPVML_vs_NAWM.PVML_vs_WML = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & PVML_vs_NAWM%in%PVML_vs_WML & !PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])
commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & PVML_vs_NAWM%in%PVML_vs_WML & PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])
commonPVML_vs_NAWM.WML_vs_CL = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & !PVML_vs_NAWM%in%PVML_vs_WML & PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])
list = list(commonPVML_vs_NAWM.PVML_vs_WML=commonPVML_vs_NAWM.PVML_vs_WML, specificWML_vs_CL=specificWML_vs_CL,specificPVML_vs_NAWM=specificPVML_vs_NAWM, commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, commonPVML_vs_NAWM.WML_vs_CL=commonPVML_vs_NAWM.WML_vs_CL )
fishBP = fisher_enrichment(list,go,unique(ref_G$SYMBOL))
fishMF = fisher_enrichment(list,mf,unique(ref_G$SYMBOL))
fishTFT = fisher_enrichment(list,tft,unique(ref_G$SYMBOL))
fishKEGG = fisher_enrichment(list,kegg,unique(ref_G$SYMBOL))
biological processes
enr.specificWML_vs_CL = fishBP$specificWML_vs_CL
enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr)
enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio)
enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
topspecificWML_vs_CL <- topPath(fishBP ,"specificWML_vs_CL", 0.5, 20)
enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
rownames(enr.specificWML_vs_CL) <- gsub("GOBP_", "",rownames(enr.specificWML_vs_CL))
enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
enr.specificPVML_vs_NAWM = fishBP$specificPVML_vs_NAWM
enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr)
enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio)
enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
topspecificPVML_vs_NAWM <- topPath(fishBP ,"specificPVML_vs_NAWM", 0.5, 20)
enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
rownames(enr.specificPVML_vs_NAWM) <- gsub("GOBP_", "",rownames(enr.specificPVML_vs_NAWM))
enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
enr.commonPVML_vs_NAWM.PVML_vs_WML = fishBP$commonPVML_vs_NAWM.PVML_vs_WML
enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishBP ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML))
enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishBP$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishBP ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL))
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"
enr.commonPVML_vs_NAWM.WML_vs_CL = fishBP$commonPVML_vs_NAWM.WML_vs_CL
enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishBP ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL))
enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
Specific WML_vs_CL
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))

Specific PVML_vs_NAWM
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))

Common PVML_vs_NAWM & PVML_vs_WML
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))

Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))

Common PVML_vs_NAWM & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))

Molecular Function
enr.specificWML_vs_CL = fishMF$specificWML_vs_CL
enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr)
enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio)
enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
topspecificWML_vs_CL <- topPath(fishMF ,"specificWML_vs_CL", 0.5, 20)
enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
rownames(enr.specificWML_vs_CL) <- gsub("GOMF_", "",rownames(enr.specificWML_vs_CL))
enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
enr.specificPVML_vs_NAWM = fishMF$specificPVML_vs_NAWM
enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr)
enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio)
enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
topspecificPVML_vs_NAWM <- topPath(fishMF ,"specificPVML_vs_NAWM", 0.5, 20)
enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
rownames(enr.specificPVML_vs_NAWM) <- gsub("GOMF_", "",rownames(enr.specificPVML_vs_NAWM))
enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
enr.commonPVML_vs_NAWM.PVML_vs_WML = fishMF$commonPVML_vs_NAWM.PVML_vs_WML
enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishMF ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML))
enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishMF$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishMF ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL))
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"
enr.commonPVML_vs_NAWM.WML_vs_CL = fishMF$commonPVML_vs_NAWM.WML_vs_CL
enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishMF ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL))
enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
Specific WML_vs_CL
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Specific PVML_vs_NAWM
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Common PVML_vs_NAWM & PVML_vs_WML
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Common PVML_vs_NAWM & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Transcription factor targets
enr.specificWML_vs_CL = fishTFT$specificWML_vs_CL
enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr)
enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio)
enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
topspecificWML_vs_CL <- topPath(fishTFT ,"specificWML_vs_CL", 0.5, 20)
enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
enr.specificPVML_vs_NAWM = fishTFT$specificPVML_vs_NAWM
enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr)
enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio)
enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
topspecificPVML_vs_NAWM <- topPath(fishTFT ,"specificPVML_vs_NAWM", 0.5, 20)
enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
enr.commonPVML_vs_NAWM.PVML_vs_WML = fishTFT$commonPVML_vs_NAWM.PVML_vs_WML
enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishTFT ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishTFT$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishTFT ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"
enr.commonPVML_vs_NAWM.WML_vs_CL = fishTFT$commonPVML_vs_NAWM.WML_vs_CL
enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishTFT ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
Specific WML_vs_CL
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
Specific PVML_vs_NAWM
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()
Common PVML_vs_NAWM & PVML_vs_WML
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()
Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
Common PVML_vs_NAWM & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
KEGG pathway database
enr.specificWML_vs_CL = fishKEGG$specificWML_vs_CL
enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr)
enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio)
enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
topspecificWML_vs_CL <- topPath(fishKEGG ,"specificWML_vs_CL", 0.5, 20)
enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
rownames(enr.specificWML_vs_CL) <- gsub("KEGG", "",rownames(enr.specificWML_vs_CL))
enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
enr.specificPVML_vs_NAWM = fishKEGG$specificPVML_vs_NAWM
enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr)
enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio)
enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
topspecificPVML_vs_NAWM <- topPath(fishKEGG ,"specificPVML_vs_NAWM", 0.5, 20)
enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
rownames(enr.specificPVML_vs_NAWM) <- gsub("KEGG", "",rownames(enr.specificPVML_vs_NAWM))
enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
enr.commonPVML_vs_NAWM.PVML_vs_WML = fishKEGG$commonPVML_vs_NAWM.PVML_vs_WML
enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
topcommonPVML_vs_NAWM.PVML_vs_WML = subset(enr.commonPVML_vs_NAWM.PVML_vs_WML,NOM_pval<=0.1)
topcommonPVML_vs_NAWM.PVML_vs_WML = topcommonPVML_vs_NAWM.PVML_vs_WML$pathway
enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML))
enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishKEGG$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishKEGG ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL))
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"
enr.commonPVML_vs_NAWM.WML_vs_CL = fishKEGG$commonPVML_vs_NAWM.WML_vs_CL
enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr)
enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio)
enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
topcommonPVML_vs_NAWM.WML_vs_CL = subset(enr.commonPVML_vs_NAWM.WML_vs_CL,NOM_pval<=0.1)
topcommonPVML_vs_NAWM.WML_vs_CL = topcommonPVML_vs_NAWM.WML_vs_CL$pathway
enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL))
enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
Specific WML_vs_CL
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
Specific PVML_vs_NAWM
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()
Common PVML_vs_NAWM & PVML_vs_WML
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()
Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
common PVML_vs_NAWM & WML_vs_CL
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
This is an analysis performed on the bulk RNA-Seq dataset from the
paper titled:
Brain macrophages acquire distinct transcriptomes in multiple
sclerosis lesions and normal appearing white matter.
The authors of this paper have sequenced 68 samples of human brain
material. Here is the paper
Abstract
Multiple sclerosis (MS) is a disease of the central nervous system
(CNS) that is characterized by inflammation and focal areas of
demyelination, ultimately resulting in axonal degradation and neuronal
loss. Several lines of evidence point towards a role for microglia and
other brain macrophages in disease initiation and progression, but
exactly how lesion formation is triggered is currently unknown. Here, we
characterized early changes in MS brain tissue through transcriptomic
analysis of normal appearing white matter (NAWM). We found that NAWM was
characterized by enriched expression of genes associated with
inflammation and cellular stress derived from brain macrophages. Single
cell RNA sequencing confirmed an early stress response in brain
macrophages in NAWM and identified specific macrophage subsets that
associate with different stages of demyelinating lesions. These early
changes associated with lesion development in MS brain tissue may
provide therapeutic targets to limit lesion progression and
demyelination.
goBP = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.bp.v7.5.1.symbols.gmt')
metaP <- read.table("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/SraRunTable.txt", header = TRUE, sep = ",")
metaP <- data.frame(sample = metaP$Sample.Name, group=metaP$Group)
metaP <- metaP[!duplicated(metaP[ , c("sample", "group")]), ]
metaP$group <- toupper(metaP$group)
refP <- read.csv("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/ref.csv", header = TRUE, sep = ";")
refP <- data.frame(sample = refP$sample, sampleName = refP$sampleName)
refP <- merge(metaP,refP,by="sample")
countsP <- read.table("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/GSE179427_countmtx.csv", header = TRUE, sep = ";",row.names=1)
countsP[is.na(countsP)] <- 0
refPP <- data.frame(sampleName = colnames(countsP))
refPP <- merge(refPP, refP, by="sampleName")
refPP$group <- gsub('NAGM','NAWM',refPP$group)
con_colorsP = c('olivedrab2','deepskyblue','darkorange')
names(con_colorsP)=c('CONTROL','NAWM','LESION')
##remove mitochondrial gene
convP=select(org.Hs.eg.db,keys=rownames(EP$E),keytype='ENSEMBL',columns=c('GENENAME','SYMBOL'))
refGP=convP[!duplicated(convP[,1]),]
rownames(refGP)=refGP[,1]
c=subset(convP,grepl('mitochon',convP[,2]))
cc = subset(c,grepl('^MT', c[,3]))
NGMito = countsP[!rownames(countsP) %in% cc$ENSEMBL,]
## limma DEA
d0P <- DGEList(NGMito)
#d0P <- DGEList(countsP) with mitochondrial gene
d0P <- calcNormFactors(d0P)
cutoff <- 5
dropP <- which(apply(cpm(d0P), 1, max) < cutoff)
dP <- d0P[-dropP,]
metadataP <- data.frame(sampleName=rownames(dP$samples))
metadataP$group <- NA
for (i in 1:nrow(metadataP)) {
for (j in 1:nrow(refPP)) {
if(metadataP$sampleName[i]==refPP$sampleName[j]){
metadataP$group[i] = refPP$group[j]
break
}
}
}
rownames(metadataP) <- metadataP$sampleName
metadataP <- subset(metadataP, select=-c(sampleName))
mmP <- model.matrix(~0 + group:group,data=metadataP)
EP <- voom(dP, mmP, plot = F)
Principal component analysis of 68 white matter brain tissue
samples.
26 Control
31 NAWM
11 Lesion
p <- pca(EP$E, metadata = metadataP)
plot1 <- biplot(p, colby = 'group',hline = 0, vline = 0,legendPosition = 'right', lab="")
plot2<- plot1 + scale_color_manual( name= "Groups",labels = c("CONTROL","LESION","NAWM"), values= c('olivedrab2','darkorange4','goldenrod2'))
plot2
Comparisons between conditions
fitP <- lmFit(EP, mmP)
contrP <- makeContrasts(
A= groupNAWM - groupCONTROL,
B= groupLESION - groupNAWM,
C= groupLESION - groupCONTROL, levels = colnames(coef(fitP)))
tmpP <- contrasts.fit(fitP,contrP)
tmpP <- eBayes(tmpP)
res.AP <- topTable(tmpP, sort.by = "P", n = Inf, coef='A',adjust="fdr")
res.BP <- topTable(tmpP, sort.by = "P", n = Inf, coef='B',adjust="fdr")
res.CP <- topTable(tmpP, sort.by = "P", n = Inf, coef='C',adjust="fdr")
res.AP$condition = 'A: NAWM vs CWM'
res.BP$condition = 'B: WML vs NAWM'
res.CP$condition = 'C: WML vs CWM'
res.AP$geneid = rownames(res.AP)
res.BP$geneid = rownames(res.BP)
res.CP$geneid = rownames(res.CP)
res.T = rbind(res.AP,res.BP,res.CP)
res.T$symbol = refGP[res.T$geneid,3]
res.AP$symbol = refGP[res.AP$geneid,3]
res.BP$symbol = refGP[res.BP$geneid,3]
res.CP$symbol = refGP[res.CP$geneid,3]
degs.AP = subset(res.T, condition == 'A: NAWM vs CWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol
degs.BP = subset(res.T, condition == 'B: WML vs NAWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol
degs.CP = subset(res.T, condition == 'C: WML vs CWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol
res.T$sign = 'NS'
res.T[which(res.T$adj.P.Val<0.05 & res.T$logFC>1),'sign']='UP'
res.T[which(res.T$adj.P.Val<0.05 & res.T$logFC<(-1)),'sign']='DN'
res.BP$sign = 'NS'
res.BP[which(res.BP$adj.P.Val<0.05 & res.BP$logFC>1),'sign']='UP'
res.BP[which(res.BP$adj.P.Val<0.05 & res.BP$logFC<(-1)),'sign']='DN'
###
res.BP<-na.omit(res.BP[order(-res.BP$logFC),])
ggplot(res.T,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+theme_bw()+scale_color_manual(values=sign_cols)+geom_point(data=res.BP[1:10,], col="black") +geom_label_repel(data=res.BP[1:30,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) +theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))
###
res.AP<-na.omit(res.AP[order(res.AP$P.Value),])
res.BP<-na.omit(res.BP[order(res.BP$P.Value),])
res.CP<-na.omit(res.CP[order(res.CP$P.Value),])
Volcano plot with the top 10 significant genes
options(ggrepel.max.overlaps = Inf)
volcanoTop10 <- ggplot(res.T,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+theme_bw()+scale_color_manual(values=c('deepskyblue','grey75','tomato'))+
geom_point(data=res.AP[1:10,], col="black") +geom_label_repel(data=res.AP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) +
geom_point(data=res.BP[1:10,], col="black") +geom_label_repel(data=res.BP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) +
geom_point(data=res.CP[1:10,], col="black") +geom_label_repel(data=res.CP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) +
theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+ facet_wrap(~condition)
volcanoTop10
Volcano plot with genes of interest (top significant genes from
Bulk);
ICAM1,VCAM1,“DNAH5”,“CIBAR2”,“LRRIQ1”,“ODAD2”,“CD24”,“DLEC1”,“CFAP54”,“FHAD1”,“DYNLT5”,“CFAP43”,“RSPH1”,“DNAI3”
res=res[order(res$P.Value),]
genelabelsP <- c("CD24")
geneLabelsP2 <- unique(head(resA$gene_name,30))
genelabelsP <- append(genelabelsP, geneLabelsP2)
#### put gene of the public data on our comparaison
#geneLabelsPub <- unique(head(res.BP$symbol,30))
#geneLabelsPubSignificant <- subset(res.BP,sign=="UP")
#geneLabelsPubSignificant <- unique(geneLabelsPubSignificant$symbol)
#ggplot(resA,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=sign_cols)+ geom_point(data=resA[match(geneLabelsPub, resA$gene_name),], col="black") +geom_label_repel(data=resA[match(geneLabelsPub$symbol, resA$gene_name),],aes(label=geneLabelsPub),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("WML vs NAWM")
###
p1 = ggplot(subset(res.T,condition=="A: NAWM vs CWM"),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.AP[match(genelabelsP, res.AP$symbol),], col="black") +geom_label_repel(data=res.AP[match(genelabelsP, res.AP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("A: NAWM vs CWM")
p2 = ggplot(subset(res.T,condition=='B: WML vs NAWM'),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.BP[match(genelabelsP, res.BP$symbol),], col="black") +geom_label_repel(data=res.BP[match(genelabelsP, res.BP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("B: WML vs NAWM")
p3 = ggplot(subset(res.T,condition=='C: WML vs CWM'),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.CP[match(genelabelsP, res.CP$symbol),], col="black") +geom_label_repel(data=res.CP[match(genelabelsP, res.CP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("C: WML vs CWM")
ggarrange(
p1,p2,p3, common.legend = TRUE, legend = "bottom", ncol = 3)
Boxplots of select genes demonstrating significant differential gene
expression.
#CIBAR2 <- plot.geneP("CIBAR2",cols=con_colorsP)
DNAH5 <- plot.geneP("DNAH5",cols=con_colorsP)
#irf1 <- plot.geneP("IRF1",cols=con_colorsP)
LRRIQ1 <- plot.geneP("LRRIQ1",cols=con_colorsP)
icam1 <- plot.geneP("ICAM1",cols=con_colorsP)
ODAD2 <- plot.geneP("ODAD2",cols=con_colorsP)
vcam1 <- plot.geneP("VCAM1",cols=con_colorsP)
#CD24 <- plot.geneP("CD24",cols=con_colorsP)
dlec1 <- plot.geneP("DLEC1",cols=con_colorsP)
cfap54 <- plot.geneP("CFAP54",cols=con_colorsP)
fhad1 <- plot.geneP("FHAD1",cols=con_colorsP)
dynlt5 <- plot.geneP("DYNLT5",cols=con_colorsP)
cfap43 <- plot.geneP("CFAP43",cols=con_colorsP)
rsph1 <- plot.geneP("RSPH1",cols=con_colorsP)
dnai3 <- plot.geneP("DNAI3",cols=con_colorsP)
#cxcl14 <- plot.geneP("CXCL14",cols=con_colorsP)
ggarrange(DNAH5, LRRIQ1, ODAD2, icam1, dlec1, cfap54, vcam1, fhad1, dynlt5, cfap43, rsph1, dnai3, ncol = 3, nrow = 4,align = c("hv"), common.legend = TRUE, legend="bottom")
Gene Ontology enrichment analysis (Top pathway)
Biological Process
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))
listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,goBP,unique(res.T$symbol))
##top10 pathway BP
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr)
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio)
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
topAP <- topPath(fishP ,"specificAP", 0.5, 14)
tabAP1 <- tabAP[match(topAP, rownames(tabAP)),]
rownames(tabAP1) <- gsub("GOBP_", "",rownames(tabAP1))
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1))
rownames(tabAP) <- gsub("GOBP_", "",rownames(tabAP))
rownames(tabAP) <- gsub("_", " ",rownames(tabAP))
tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio)
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
topBP <- topPath(fishP ,"specificBP", 0.5, 14)
tabBP1 <- tabBP[match(topBP, rownames(tabBP)),]
rownames(tabBP1) <- gsub("GOBP_", "",rownames(tabBP1))
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1))
rownames(tabBP) <- gsub("GOBP_", "",rownames(tabBP))
rownames(tabBP) <- gsub("_", " ",rownames(tabBP))
tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio)
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 14)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("GOBP_", "",rownames(tabCP1))
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1))
rownames(tabCP) <- gsub("GOBP_", "",rownames(tabCP))
rownames(tabCP) <- gsub("_", " ",rownames(tabCP))
Top 10 pathway CWM vs NAWM
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Biological Process 2021") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Top 10 pathway NAWM vs WML
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Biological Process 2021") + coord_flip()+ theme_bw()+ theme(axis.text = element_text(face="bold"))
Top 10 pathway CWM vs WML
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+theme_bw()+ theme(axis.text = element_text(face="bold"))
Molecular Function
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))
listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,mf,unique(res.T$symbol))
##top10 pathway MF
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr)
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio)
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
topAP <- topPath(fishP ,"specificAP", 0.5, 12)
tabAP1 <- tabAP[match(topAP, rownames(tabAP)),]
rownames(tabAP1) <- gsub("GOMF_", "",rownames(tabAP1))
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1))
rownames(tabAP) <- gsub("GOMF_", "",rownames(tabAP))
rownames(tabAP) <- gsub("_", " ",rownames(tabAP))
tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio)
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.05)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[-c(8, 9, 14, 15), ]
tabBP1 <- tabBP1[1:12,]
rownames(tabBP1) <- gsub("GOMF_", "",rownames(tabBP1))
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1))
rownames(tabBP) <- gsub("GOMF_", "",rownames(tabBP))
rownames(tabBP) <- gsub("_", " ",rownames(tabBP))
tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio)
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("GOMF_", "",rownames(tabCP1))
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1))
rownames(tabCP) <- gsub("GOMF_", "",rownames(tabCP))
rownames(tabCP) <- gsub("_", " ",rownames(tabCP))
Top 10 pathway CWM vs NAWM
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Molecular Function 2021") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
Top 10 pathway NAWM vs WML
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+ theme_bw()+ theme(axis.text = element_text(face="bold"))
Top 10 pathway CWM vs WML
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+theme_bw()+ theme(axis.text = element_text(face="bold"))
Transcription factor targets
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))
listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,tft,unique(res.T$symbol))
##top10 pathway TFT
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr)
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio)
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
tabAP=tabAP[order(tabAP$NOM_pval),]
topAP = subset(tabAP,NOM_pval<=0.05)
topAP$pathway = rownames(topAP)
tabAP1 <- tabAP[match(rownames(topAP), rownames(tabAP)),]
tabAP1 <- tabAP1[1:12,]
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1))
rownames(tabAP) <- gsub("_", " ",rownames(tabAP))
tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio)
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.05)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[1:12,]
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1))
rownames(tabBP) <- gsub("_", " ",rownames(tabBP))
tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio)
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1))
rownames(tabCP) <- gsub("_", " ",rownames(tabCP))
Top 10 pathway CWM vs NAWM
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Transcription factor targets 2021") + xlab("") + coord_flip() + theme_bw()
Top 10 pathway NAWM vs WML
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Transcription factor targets 2021") + coord_flip()+ theme_bw()
Top 10 pathway CWM vs WML
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Transcription factor targets 2021") + coord_flip()+theme_bw()
KEGG pathway database
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))
listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,kegg,unique(res.T$symbol))
##top10 pathway KEGG
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr)
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio)
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
tabAP=tabAP[order(tabAP$NOM_pval),]
topAP = subset(tabAP,NOM_pval<=0.17)
topAP$pathway = rownames(topAP)
tabAP1 <- tabAP[match(rownames(topAP), rownames(tabAP)),]
rownames(tabAP1) <- gsub("KEGG", "",rownames(tabAP1))
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1))
rownames(tabAP) <- gsub("KEGG", "",rownames(tabAP))
rownames(tabAP) <- gsub("_", " ",rownames(tabAP))
tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio)
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.3)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[1:10,]
rownames(tabBP1) <- gsub("KEGG", "",rownames(tabBP1))
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1))
rownames(tabBP) <- gsub("KEGG", "",rownames(tabBP))
rownames(tabBP) <- gsub("_", " ",rownames(tabBP))
tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio)
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("KEGG", "",rownames(tabCP1))
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1))
rownames(tabCP) <- gsub("KEGG", "",rownames(tabCP))
rownames(tabCP) <- gsub("_", " ",rownames(tabCP))
Top 10 pathway CWM vs NAWM
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO KEGG pathway database 2021") + xlab("") + coord_flip() + theme_bw()
Top 10 pathway NAWM vs WML
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO KEGG pathway database 2021") + coord_flip()+ theme_bw()
Top 10 pathway CWM vs WML
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO KEGG pathway database 2021") + coord_flip()+theme_bw()
---
title: "Bulk analysis of MS lesions"
output: html_notebook
---

This is an analysis performed on the bulk RNA-Seq dataset

The workflow is as follows:  
1. Inspect variance in the dataset (Filtering, Normalization, PCA)  
2. Perform differential expression analysis (Using limma)  
3. Annotate biological signatures found by DEA (GSEA)  

```{r  echo=TRUE, message=FALSE, warning=FALSE}
library(edgeR)
library(fgsea)
library(qusage)
library(DESeq2)
library(org.Hs.eg.db)
library(ggplot2)
library(enrichR)
library(VennDiagram)
library(ggpubr)
library(ggrepel)
library(PCAtools)
library(RColorBrewer)
library(ggiraph)
library(UpSetR)
library(rhandsontable)
library(nloptr)
library(ggVennDiagram)
library(BiocManager)
library(stringr)
library(knitr)
```
### Counts list
```{r echo=TRUE, message=FALSE, warning=FALSE, results='asis'}
dat = read.csv('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/data/merged_counts.apr2022.csv',row.names=1)
knitr::kable(dat[1:6, 1:12], caption ="A Knitr kable")
```

### Meta data
```{r echo=TRUE, message=FALSE, warning=FALSE, results='asis'}
meta = read.table('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/data/meta_data.apr2022.csv',row.names=1,sep = ",")
names(meta)[names(meta) == "type"] <- "lesion_type"
meta$sex = 'M'
meta[which(meta$sample=="AB203"),]$sex = 'F'
meta.tmp <- meta[c(2,7,10,11,12,13)]
knitr::kable(meta.tmp[1:8,], caption ="A Knitr kable")
```


Define functions for the analysis:
- find_enrichments(): For a given comparison (DEA) extract enrichments for GO(BP), GO(MF), KEGG and TFT  
- fisher_enrichment(): Perform an enrichment of genesets using a fisher test approach 
- plot.gene(): Plot gene expression for a given gene across conditions. 

The gmt files contain the genesets for *GSEA*

```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
go = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.bp.v7.5.1.symbols.gmt')
mf = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.mf.v7.5.1.symbols.gmt')
kegg = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c2.cp.kegg.v7.5.1.symbols.gmt')
tft = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c3.tft.v7.5.1.symbols.gmt')

sign_cols=c(DN='deepskyblue',NS='grey80',UP='tomato')

plot.gene <- function(x){
  gene_id = rownames(subset(resA,gene_name==x))[1]
  meta$gene = y$E[gene_id,rownames(meta)]
  ggplot(subset(meta,lesion_type!="Mixed"),aes(lesion_type,gene))+geom_boxplot()+geom_point(aes(col=sample))+theme_bw()+ ggtitle(x)
}

# plot.geneP("SOD2",cols=con_colorsP)
plot.geneP <- function(symbol,x='group',cols){
  
  geneid = subset(convP,SYMBOL==symbol)$ENSEMBL
  meta.tmp = metadataP
  meta.tmp$expr = EP$E[geneid,rownames(meta.tmp)]
  meta.tmp$group <- factor(meta.tmp$group , levels=c('CONTROL','NAWM','LESION'))
  pl = ggplot(meta.tmp,aes(group,expr,fill=group))+geom_boxplot(width=.75,size=.75)+theme_bw()+scale_fill_manual(values=cols)+
    labs(title = symbol, x = "Group", y = "Expr") + xlab("") + ylab("")
  
  return(pl)
}

fisher_enrichment<-function(cluster_markers,pathway,universe,pathway_name){
enrichment_tables=list() 
  for(cluster in 1:length(cluster_markers)){
    cluster_name=names(cluster_markers)[cluster]
    output <- lapply(pathway, function(x) {
    freq.table <- table(factor(universe %in% as.character(cluster_markers[[cluster]]), 
                          levels = c(TRUE,FALSE)), 
                    	  factor(universe %in% x, 
                          levels = c(TRUE,FALSE)))

      fit <- fisher.test(freq.table, alternative = "greater")
      interSection <- intersect(cluster_markers[[cluster]], x)
      interSection <- paste(interSection, collapse = ",")
      return(value = c(NOM_pval = fit$p.value, INTERSECT = interSection,"OR"=fit$estimate))})
    
    term_names=character()
    for (pathway.term in 1:length(output)){
      term_names[pathway.term]=pathway[[pathway.term]][1]
    }
  
    results=data.frame(do.call("rbind",output))
    results$fdr=p.adjust(as.numeric(as.character(results$NOM_pval)),method = "BH")
    results=results[order(results$fdr),]
    enrichment_tables[[cluster_name]]=results
  }

  return(enrichment_tables)
}

find_enrichments<-function(resA,n=5,show='both'){
  resA.t = resA$t
  names(resA.t)=resA$gene_name 
  if(show=='up'){
    resA.bp = subset(fgsea(go,resA.t),NES>0)
    resA.mf = subset(fgsea(mf,resA.t),NES>0)
    resA.kegg= subset(fgsea(kegg,resA.t),NES>0)
    resA.tft= subset(fgsea(tft,resA.t),NES>0)
  }else if(show=='down'){
    resA.bp = subset(fgsea(go,resA.t),NES<0)
    resA.mf = subset(fgsea(mf,resA.t),NES<0)
    resA.kegg= subset(fgsea(kegg,resA.t),NES<0)   
    resA.tft= subset(fgsea(tft,resA.t),NES<0)
    
  }else{
    resA.bp = fgsea(go,resA.t)
    resA.mf = fgsea(mf,resA.t)
    resA.kegg= fgsea(kegg,resA.t)   
    resA.tft= subset(fgsea(tft,resA.t))
  }
  
  resA.bp$GO = 'BP'
  resA.mf$GO = 'MF'
  resA.kegg$GO = 'KEGG'
  out=list(enr=list(bp=resA.bp,mf=resA.mf,kegg=resA.kegg,tft=resA.tft))
  
  enr = rbind(
    head(resA.bp[order(resA.bp$padj),],n),
    head(resA.mf[order(resA.mf$padj),],n),
    head(resA.kegg[order(resA.kegg$padj),],n)
  )
  enr=data.frame(enr)
  enr$pathway=gsub('_',' ',gsub('GO_|KEGG_','',enr$pathway))
  enr$pathway=factor(enr$pathway,levels=as.character(enr[order(enr$GO,enr$NES),'pathway']))
  p=ggplot(enr,aes(pathway,NES,fill=GO))+geom_col(width=.6,alpha=.6)+geom_col(width=.6,fill=NA,aes(col=GO))+coord_flip()+theme_bw()+geom_vline(xintercept=c(n,n*2)+.5,linetype='dashed')+geom_hline(yintercept=0)
  out$plot=p
  return(out)
}

##top pathway
topPath<- function(fish ,specific, p ,n){
  l=n*20
  result <- matrix(NA, l, l)
  
  colnames(result) <- rownames(result) <- rownames(getElement(fish, specific))[1:l]
  for(i in 1:l){
    for(j in 1:l){
      minlist = 0
      if(length(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")))>=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ",")))) minlist=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ","))) 
      else minlist=length(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")))
      result[i, j] <- length(intersect(unlist(strsplit(fish[[specific]][["INTERSECT"]][i], split = ",")), unlist(strsplit(fish[[specific]][["INTERSECT"]][j], split = ","))))/(minlist)      
      result[j, i] <- result[i, j]     
    }
  }
  listBPspecific = list(c(rownames(result)[1]))
  i=1
  while(i < l){
    j=i
    while(j < l){
      if(result[i, j] < p){ 
        i=j
        listBPspecific <- c(listBPspecific, rownames(result)[j])
        j=l
      }else {
        j=j+1
      }
    }
    if(length(listBPspecific)>n-1) 
      i=l
  }
  return(listBPspecific)
}
```

## Differential expression analysis with limma-voom

### Filtering to remove lowly expressed genes + Normalization for composition bias

use the *edgeR/limma* workflow to filter out lowly expressed genes. *Voom* is used to normalize the raw counts. A linear mixed model is used to fit the expression values. Defining the model is done with the model.matrix function of edgeR. In the model are included factors of interest (group). If a batch/patient effect is detected, it can be added to the model to account for its effect.

```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
d0 <- DGEList(dat[-1])
d0 <- calcNormFactors(d0)
cutoff <- 2
drop <- which(apply(cpm(d0), 1, max) < cutoff)
d <- d0[-drop,]

mm <- model.matrix(~ 0 + lesion_type+batch+sample, data=meta[colnames(d),])

# Without filtering low expressed genes
y <- voom(d0[,rownames(meta)], mm, plot = T)

# Filtering low expressed genes
y <- voom(d[,rownames(meta)], mm, plot = T)

##gene name reference(ref_G)
conv_B=AnnotationDbi::select(org.Hs.eg.db,keys=rownames(y$E),keytype='ENSEMBL',columns=c('GENENAME','SYMBOL'))
ref_G=conv_B[!duplicated(conv_B[,1]),]
rownames(ref_G)=ref_G[,1]
##
```

## PCA {.tabset}
Perform a *PCA* on the normalized samples and identify main factors of variation. 

### Data including batch effect
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 7, fig.height = 6}
BD <- pca(y$E, metadata = meta)
biplot(BD, colby = 'batch',hline = 0, vline = 0,legendPosition = 'right', lab="",title = "Data including batch effect + patient effect")
```

### Only Batch effect removed from data (color by patient)
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 7, fig.height = 6}
BDCorrB <- removeBatchEffect(y$E,batch = meta$batch)
BD1B <- pca(BDCorrB, metadata = meta)
biplot(BD1B, colby = 'sample',shape = 'sex', hline = 0, vline = 0,legendPosition = 'right', lab="",title="Only Batch effect removed from data")
```


### Only Batch effect removed from data (color by batch)
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 7, fig.height = 6}
biplot(BD1B, colby = 'batch',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Only Batch effect removed from data")
```

### Patient+Batch effect removed from data (color by sample)
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 7, fig.height = 6}
BDCorrBS <- removeBatchEffect(y$E,batch = meta$batch, batch2 = meta$sample)
BD1BS <- pca(BDCorrBS, metadata = meta)
biplot(BD1BS, colby = 'sample',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Batch+Patient effect removed from data")
```

### Patient+Batch effect removed from data (color by lesion_type and sex)
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 7, fig.height = 6}
biplot(BD1BS, colby = 'lesion_type',shape = 'sex',hline = 0, vline = 0,legendPosition = 'right', lab="",title="Batch+Patient effect removed from data")
```


##  Specify Contrast(s) of interest

Use a linear model to fit voomed counts (logCPM). Instanciate the comparisons of interest by using the contrast approach from *limma*. Extract the results, annotate the tables and merge them into a global result dataframe. 

```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
fit <- lmFit(y, mm)
contr <- makeContrasts(
  A = lesion_typeWML - lesion_typeNAWM,
  B = lesion_typeWML - lesion_typeCL,
  C = lesion_typeCL - lesion_typeNAGM,
  D = lesion_typeNAWM - lesion_typeNAGM,
  E = lesion_typePVML - lesion_typeNAWM,
  F = lesion_typePVML - lesion_typeWML,
  G = lesion_typePVML - lesion_typeCL,
  levels = colnames(coef(fit)))
tmp <- contrasts.fit(fit, contr)
tmp <- eBayes(tmp)

resA <- topTable(tmp, sort.by = "P", n = Inf,coef='A')
resB <- topTable(tmp, sort.by = "P", n = Inf,coef='B')
resC <- topTable(tmp, sort.by = "P", n = Inf,coef='C')
resD <- topTable(tmp, sort.by = "P", n = Inf,coef='D')
resE <- topTable(tmp, sort.by = "P", n = Inf,coef='E')
resF <- topTable(tmp, sort.by = "P", n = Inf,coef='F')
resG <- topTable(tmp, sort.by = "P", n = Inf,coef='G')
```


## Comparisons between conditions (Volcano plot) {.tabset}

Plot results in the form of volcano plots. Thresholds used are adj.P.Val<0.05 & |logFC|>1. Red genes are upregulated in the first group as found in the subtitle of each graph. 

### PVML_vs_NAWM
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resE$gene_id =  rownames(resE)
resE$comp = 'PVML_vs_NAWM'
resE$gene_name = ref_G[rownames(resE),3]
resE=resE[order(-abs(resE$logFC)),]
resE$show = F
resE[1:15,'show']=T
resE$sign = 'NS'
resE[which(resE$adj.P.Val<0.05&resE$logFC>1),'sign']='UP'
resE[which(resE$adj.P.Val<0.05&resE$logFC<(-1)),'sign']='DN'
ggplot(resE,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resE,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_NAWM')
```

### WML_vs_NAWM
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resA$gene_id =  rownames(resA)
resA$comp = 'WML_vs_NAWM'
resA$gene_name = ref_G[rownames(resA),3]
resA=resA[order(-abs(resA$logFC)),]
resA$show = F
resA[1:15,'show']=T
resA$sign = 'NS'
resA[which(resA$adj.P.Val<0.05&resA$logFC>1),'sign']='UP'
resA[which(resA$adj.P.Val<0.05&resA$logFC<(-1)),'sign']='DN'
ggplot(resA,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resA,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('WML_vs_NAWM')
```

### WML_vs_CL
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resB$gene_id =  rownames(resB)
resB$comp = 'WML_vs_CL'
resB$gene_name = ref_G[rownames(resB),3]
resB=resB[order(-abs(resB$logFC)),]
resB$show = F
resB[1:15,'show']=T
resB$sign = 'NS'
resB[which(resB$adj.P.Val<0.05&resB$logFC>1),'sign']='UP'
resB[which(resB$adj.P.Val<0.05&resB$logFC<(-1)),'sign']='DN'
ggplot(resB,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resB,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('WML_vs_CL')
```

### CL_vs_NAGM
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resC$gene_id =  rownames(resC)
resC$comp = 'CL_vs_NAGM'
resC$gene_name = ref_G[rownames(resC),3]
resC=resC[order(-abs(resC$logFC)),]
resC$show = F
resC[1:15,'show']=T
resC$sign = 'NS'
resC[which(resC$adj.P.Val<0.05&resC$logFC>1),'sign']='UP'
resC[which(resC$adj.P.Val<0.05&resC$logFC<(-1)),'sign']='DN'
ggplot(resC,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resC,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('CL_vs_NAGM')
```

### NAWM_vs_NAGM
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resD$gene_id =  rownames(resD)
resD$comp = 'NAWM_vs_NAGM'
resD$gene_name = ref_G[rownames(resD),3]
resD=resD[order(-abs(resD$logFC)),]
resD$show = F
resD[1:15,'show']=T
resD$sign = 'NS'
resD[which(resD$adj.P.Val<0.05&resD$logFC>1),'sign']='UP'
resD[which(resD$adj.P.Val<0.05&resD$logFC<(-1)),'sign']='DN'
ggplot(resD,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resD,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('NAWM_vs_NAGM')
```

### PVML_vs_WML
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resF$gene_id =  rownames(resF)
resF$comp = 'PVML_vs_WML'
resF$gene_name = ref_G[rownames(resF),3]
resF=resF[order(-abs(resF$logFC)),]
resF$show = F
resF[1:15,'show']=T
resF$sign = 'NS'
resF[which(resF$adj.P.Val<0.05&resF$logFC>1),'sign']='UP'
resF[which(resF$adj.P.Val<0.05&resF$logFC<(-1)),'sign']='DN'
ggplot(resF,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resF,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_WML')
```

### PVML_vs_CL
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 4, fig.height = 3}
resG$gene_id =  rownames(resG)
resG$comp = 'PVML_vs_CL'
resG$gene_name = ref_G[rownames(resG),3]
resG=resG[order(-abs(resG$logFC)),]
resG$show = F
resG[1:15,'show']=T
resG$sign = 'NS'
resG[which(resG$adj.P.Val<0.05&resG$logFC>1),'sign']='UP'
resG[which(resG$adj.P.Val<0.05&resG$logFC<(-1)),'sign']='DN'
ggplot(resG,aes(logFC,-log10(P.Value),col=sign))+geom_point_interactive(aes(x=logFC,y=-log10(P.Value),color=sign,tooltip = gene_name,data_id = gene_name))+theme_bw()+scale_color_manual(values=sign_cols)+ geom_label_repel(data=subset(resG,show==T),aes(label=gene_name),col='black',size=3,force=30,fill = alpha(c("white"),0.5))+ggtitle('PVML_vs_CL')
```

Boxplots of select genes demonstrating significant differential gene expression.

```{r, fig.width = 8, fig.height = 7}
CIBAR2 <- plot.gene("CIBAR2")
DNAH5 <- plot.gene("DNAH5")
irf1 <- plot.gene("IRF1")
LRRIQ1 <- plot.gene("LRRIQ1")
icam1 <- plot.gene("ICAM1")
ODAD2 <- plot.gene("ODAD2")
vcam1 <- plot.gene("VCAM1")
CD24 <- plot.gene("CD24")
dlec1 <- plot.gene("DLEC1")
cfap54 <- plot.gene("CFAP54")
fhad1 <- plot.gene("FHAD1")
dynlt5 <- plot.gene("DYNLT5")
cfap43 <- plot.gene("CFAP43")
rsph1 <- plot.gene("RSPH1")
dnai3 <- plot.gene("DNAI3")
cxcl14 <- plot.gene("CXCL14")
ggarrange(CIBAR2, DNAH5, LRRIQ1, ODAD2, irf1, CD24, icam1, dlec1, cfap54, vcam1, fhad1, dynlt5, cfap43, rsph1, dnai3, cxcl14, ncol = 4, nrow = 4,align = c("hv"), common.legend = TRUE, legend="bottom")
```

Compare fold-changes across conditions.

```{r out.width="300%",message=F,fig.width=4, out.width="30%", fig.align="default"}
df = data.frame(cbind(resE$logFC,resA[rownames(resE),'logFC'],resB[rownames(resE),'logFC'],resF[rownames(resE),'logFC']))
colnames(df)=c('WML_vs_NAWM','PVML_vs_NAWM','WML_vs_CL','PVML_vs_WML')

p1=ggplot(df,aes(WML_vs_NAWM,PVML_vs_NAWM))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p2=ggplot(df,aes(WML_vs_NAWM,WML_vs_CL))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p3=ggplot(df,aes(WML_vs_NAWM,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p4=ggplot(df,aes(PVML_vs_NAWM,WML_vs_CL))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p5=ggplot(df,aes(PVML_vs_NAWM,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')
p6=ggplot(df,aes(WML_vs_CL,PVML_vs_WML))+geom_point()+theme_bw()+stat_cor()+geom_smooth(method='lm')+ggtitle('Fold-Changes Correlation')

p1
p2
p3
p4
p5
p6

```



## Gene ontology term enrichment analysis of comparisons between conditions  {.tabset}

This extracts biological signatures from the transcriptomic changes found in the differential expression analysis

```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
enr.a=find_enrichments(resA)
enr.b=find_enrichments(resB)
enr.c=find_enrichments(resC)
enr.d=find_enrichments(resD)
enr.e=find_enrichments(resE)
enr.f=find_enrichments(resF)
enr.g=find_enrichments(resG)
```


### Biological Processes
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width =10, fig.height = 9}
enrA = enr.a$enr$bp
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)

enrB = enr.b$enr$bp
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)

enrC = enr.c$enr$bp
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)

enrD = enr.d$enr$bp
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)

enrE = enr.e$enr$bp
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)

enrF = enr.f$enr$bp
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)

enrG = enr.g$enr$bp
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)

paths = c(head(enrA,10)$pathway,
          head(enrB,10)$pathway,
          head(enrC,10)$pathway,
          head(enrD,10)$pathway,
          head(enrE,10)$pathway,
          head(enrF,10)$pathway,
          head(enrG,10)$pathway )

paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("GOBP", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss  = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))
```

### Molecular Function
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 9}
enrA = enr.a$enr$mf
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)

enrB = enr.b$enr$mf
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)

enrC = enr.c$enr$mf
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)

enrD = enr.d$enr$mf
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)

enrE = enr.e$enr$mf
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)

enrF = enr.f$enr$mf
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)

enrG = enr.g$enr$mf
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)

paths = c(head(enrA,10)$pathway,
          head(enrB,10)$pathway,
          head(enrC,10)$pathway,
          head(enrD,10)$pathway,
          head(enrE,10)$pathway,
          head(enrF,10)$pathway,
          head(enrG,10)$pathway )

paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("GOMF", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss  = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))
```


### Transcription factor targets
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 9}
enrA = enr.a$enr$tft
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)

enrB = enr.b$enr$tft
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)

enrC = enr.c$enr$tft
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)

enrD = enr.d$enr$tft
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)

enrE = enr.e$enr$tft
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)

enrF = enr.f$enr$tft
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)

enrG = enr.g$enr$tft
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)

paths = c(head(enrA,10)$pathway,
          head(enrB,10)$pathway,
          head(enrC,10)$pathway,
          head(enrD,10)$pathway,
          head(enrE,10)$pathway,
          head(enrF,10)$pathway,
          head(enrG,10)$pathway )

paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("_", " ",rb$pathway)
ss  = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))
```


### KEGG pathway database
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 9}
enrA = enr.a$enr$kegg
enrA=enrA[order(enrA$padj),]
enrA$comp = unique(resA$comp)

enrB = enr.b$enr$kegg
enrB=enrB[order(enrB$padj),]
enrB$comp = unique(resB$comp)

enrC = enr.c$enr$kegg
enrC=enrC[order(enrC$padj),]
enrC$comp = unique(resC$comp)

enrD = enr.d$enr$kegg
enrD=enrD[order(enrD$padj),]
enrD$comp = unique(resD$comp)

enrE = enr.e$enr$kegg
enrE=enrE[order(enrE$padj),]
enrE$comp = unique(resE$comp)

enrF = enr.f$enr$kegg
enrF=enrF[order(enrF$padj),]
enrF$comp = unique(resF$comp)

enrG = enr.g$enr$kegg
enrG=enrG[order(enrG$padj),]
enrG$comp = unique(resG$comp)

paths = c(head(enrA,10)$pathway,
          head(enrB,10)$pathway,
          head(enrC,10)$pathway,
          head(enrD,10)$pathway,
          head(enrE,10)$pathway,
          head(enrF,10)$pathway,
          head(enrG,10)$pathway )

paths=unique(paths)
rb=rbind(enrA[which(enrA$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrB[which(enrB$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrC[which(enrC$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrD[which(enrD$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrE[which(enrE$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrF[ which(enrF$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')],
         enrG[which(enrG$pathway%in%paths),c('pval','leadingEdge','NES','comp','pathway')])
rb[which(rb$padj>=0.1),'NES']=NA
rb$pathway <- gsub("KEGG", "",rb$pathway)
rb$pathway <- gsub("_", " ",rb$pathway)
ss  = strsplit(as.character(rb$leadingEdge),',')
rb$n_genes = unlist(lapply(ss,length))
rb$pval <- as.numeric(rb$pval)
rb[which(rb$pval>=0.1),'pval']=NA
ggplot(na.omit(rb),aes(pathway,comp,fill=NES))+geom_point(aes(size=pval),shape=21)+scale_x_discrete(label = function(x) stringr::str_trunc(x,50))+theme_bw()+coord_flip()+theme(axis.text.x=element_text( angle=60,hjust=1))+scale_fill_gradientn(colors=rev(brewer.pal(11,'RdYlBu')))+ scale_size(trans = 'reverse')+ theme(axis.text = element_text(face="bold"))

```


# Plot the overlap between DEGs with an upset plot/Venn Diagram

## Overlap of DEGs in comparaisons {.tabset}
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
res=rbind(resA,resB,resC,resD,resE,resF,resG)
res= na.omit(res)

degsListvs = list(
  WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
    PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
    PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
    WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name,
    NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)

degsListUP = list(
  WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
    PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
    PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
    WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & (logFC)>1)$gene_name,
    NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & (logFC)>1)$gene_name)

degsListDOWN = list(
  WML_vs_NAWM=subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
    PVML_vs_WML=subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
    PVML_vs_NAWM=subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
    WML_vs_CL=subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & (logFC)<1)$gene_name,
    NAWM_vs_NAGM=subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & (logFC)<1)$gene_name)

```


### UpSet plot
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 8, fig.height = 6,fig.keep = "last"}
upset(fromList(degsListvs),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))
#upset(fromList(degsListUP),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))
#upset(fromList(degsListDOWN),sets = c('WML_vs_NAWM','PVML_vs_WML','PVML_vs_NAWM',"WML_vs_CL","NAWM_vs_NAGM"),order.by='freq',decreasing = c(F),text.scale = c(1.3, 2, 1.5, 1.5, 2, 2))
```

### Venn Diagram
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 8, fig.height = 6}
#label = "count"
ggVennDiagram(degsListvs,label = "percent", label_alpha = 0)+ scale_fill_distiller(palette = "Reds", direction = 1)
```

# Gene Ontology enrichment analysis of overlapping genes in comparisons between conditions(The top 5 specific Genesets from the upsets plot) {.tabset} 

```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
WML_vs_NAWM=na.omit(subset(res,comp=='WML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
PVML_vs_WML=na.omit(subset(res,comp=='PVML_vs_WML' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
PVML_vs_NAWM=na.omit(subset(res,comp=='PVML_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
CL_vs_NAWM=na.omit(subset(res,comp=='CL_vs_NAWM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
WML_vs_CL=na.omit(subset(res,comp=='WML_vs_CL' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)
NAWM_vs_NAGM=na.omit(subset(res,comp=='NAWM_vs_NAGM' & adj.P.Val<0.1 & abs(logFC)>1)$gene_name)

specificWML_vs_CL = as.character(WML_vs_CL[!WML_vs_CL%in%WML_vs_NAWM & !WML_vs_CL%in%PVML_vs_WML & !WML_vs_CL%in%PVML_vs_NAWM & !WML_vs_CL%in%CL_vs_NAWM & !WML_vs_CL%in%NAWM_vs_NAGM])

specificPVML_vs_NAWM = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & !PVML_vs_NAWM%in%PVML_vs_WML & !PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])

commonPVML_vs_NAWM.PVML_vs_WML = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & PVML_vs_NAWM%in%PVML_vs_WML & !PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])

commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & PVML_vs_NAWM%in%PVML_vs_WML & PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])
  
commonPVML_vs_NAWM.WML_vs_CL = as.character(PVML_vs_NAWM[!PVML_vs_NAWM%in%WML_vs_NAWM & !PVML_vs_NAWM%in%PVML_vs_WML & PVML_vs_NAWM%in%WML_vs_CL & !PVML_vs_NAWM%in%CL_vs_NAWM & !PVML_vs_NAWM%in%NAWM_vs_NAGM])

list = list(commonPVML_vs_NAWM.PVML_vs_WML=commonPVML_vs_NAWM.PVML_vs_WML, specificWML_vs_CL=specificWML_vs_CL,specificPVML_vs_NAWM=specificPVML_vs_NAWM, commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, commonPVML_vs_NAWM.WML_vs_CL=commonPVML_vs_NAWM.WML_vs_CL )

fishBP = fisher_enrichment(list,go,unique(ref_G$SYMBOL))
fishMF = fisher_enrichment(list,mf,unique(ref_G$SYMBOL))
fishTFT = fisher_enrichment(list,tft,unique(ref_G$SYMBOL))
fishKEGG = fisher_enrichment(list,kegg,unique(ref_G$SYMBOL))
```

## biological processes {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
  enr.specificWML_vs_CL = fishBP$specificWML_vs_CL
  enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
  enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr) 
  enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio) 
  enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
  topspecificWML_vs_CL <- topPath(fishBP ,"specificWML_vs_CL", 0.5, 20)
  enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
  rownames(enr.specificWML_vs_CL) <- gsub("GOBP_", "",rownames(enr.specificWML_vs_CL)) 
  enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
  enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
  
  enr.specificPVML_vs_NAWM = fishBP$specificPVML_vs_NAWM
  enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
  enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr) 
  enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio) 
  enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
  topspecificPVML_vs_NAWM <- topPath(fishBP ,"specificPVML_vs_NAWM", 0.5, 20)
  enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
  rownames(enr.specificPVML_vs_NAWM) <- gsub("GOBP_", "",rownames(enr.specificPVML_vs_NAWM)) 
  enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
  enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML = fishBP$commonPVML_vs_NAWM.PVML_vs_WML
  enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
  enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishBP ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishBP$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishBP ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"

  enr.commonPVML_vs_NAWM.WML_vs_CL = fishBP$commonPVML_vs_NAWM.WML_vs_CL
  enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
  enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishBP ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("GOBP_", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"  

```


### Specific WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Specific PVML_vs_NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & PVML_vs_WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

## Molecular Function {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
  enr.specificWML_vs_CL = fishMF$specificWML_vs_CL
  enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
  enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr) 
  enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio) 
  enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
  topspecificWML_vs_CL <- topPath(fishMF ,"specificWML_vs_CL", 0.5, 20)
  enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
  rownames(enr.specificWML_vs_CL) <- gsub("GOMF_", "",rownames(enr.specificWML_vs_CL)) 
  enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
  enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
  
  enr.specificPVML_vs_NAWM = fishMF$specificPVML_vs_NAWM
  enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
  enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr) 
  enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio) 
  enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
  topspecificPVML_vs_NAWM <- topPath(fishMF ,"specificPVML_vs_NAWM", 0.5, 20)
  enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
  rownames(enr.specificPVML_vs_NAWM) <- gsub("GOMF_", "",rownames(enr.specificPVML_vs_NAWM)) 
  enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
  enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML = fishMF$commonPVML_vs_NAWM.PVML_vs_WML
  enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
  enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishMF ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishMF$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishMF ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"

  enr.commonPVML_vs_NAWM.WML_vs_CL = fishMF$commonPVML_vs_NAWM.WML_vs_CL
  enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
  enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishMF ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("GOMF_", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"  

```


### Specific WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Specific PVML_vs_NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & PVML_vs_WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

### Common PVML_vs_NAWM & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```

## Transcription factor targets {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
  enr.specificWML_vs_CL = fishTFT$specificWML_vs_CL
  enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
  enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr) 
  enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio) 
  enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
  topspecificWML_vs_CL <- topPath(fishTFT ,"specificWML_vs_CL", 0.5, 20)
  enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
  enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
  enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
  
  enr.specificPVML_vs_NAWM = fishTFT$specificPVML_vs_NAWM
  enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
  enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr) 
  enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio) 
  enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
  topspecificPVML_vs_NAWM <- topPath(fishTFT ,"specificPVML_vs_NAWM", 0.5, 20)
  enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
  enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
  enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML = fishTFT$commonPVML_vs_NAWM.PVML_vs_WML
  enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
  enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML <- topPath(fishTFT ,"commonPVML_vs_NAWM.PVML_vs_WML", 0.5, 20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishTFT$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishTFT ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"

  enr.commonPVML_vs_NAWM.WML_vs_CL = fishTFT$commonPVML_vs_NAWM.WML_vs_CL
  enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
  enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.WML_vs_CL <- topPath(fishTFT ,"commonPVML_vs_NAWM.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
  enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"  

```


### Specific WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```

### Specific PVML_vs_NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()
```

### Common PVML_vs_NAWM & PVML_vs_WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()
```

### Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```

### Common PVML_vs_NAWM & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```

## KEGG pathway database {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
  enr.specificWML_vs_CL = fishKEGG$specificWML_vs_CL
  enr.specificWML_vs_CL$pathway = rownames(enr.specificWML_vs_CL)
  enr.specificWML_vs_CL$fdr <- as.numeric(enr.specificWML_vs_CL$fdr) 
  enr.specificWML_vs_CL$OR.odds.ratio <- as.numeric(enr.specificWML_vs_CL$OR.odds.ratio) 
  enr.specificWML_vs_CL$NOM_pval <- as.numeric(enr.specificWML_vs_CL$NOM_pval)
  topspecificWML_vs_CL <- topPath(fishKEGG ,"specificWML_vs_CL", 0.5, 20)
  enr.specificWML_vs_CL <- enr.specificWML_vs_CL[match(topspecificWML_vs_CL, rownames(enr.specificWML_vs_CL)),]
  rownames(enr.specificWML_vs_CL) <- gsub("KEGG", "",rownames(enr.specificWML_vs_CL)) 
  enr.specificWML_vs_CL=enr.specificWML_vs_CL[order(enr.specificWML_vs_CL$NOM_pval),]
  enr.specificWML_vs_CL$comp = "specificWML_vs_CL"
  
  enr.specificPVML_vs_NAWM = fishKEGG$specificPVML_vs_NAWM
  enr.specificPVML_vs_NAWM$pathway = rownames(enr.specificPVML_vs_NAWM)
  enr.specificPVML_vs_NAWM$fdr <- as.numeric(enr.specificPVML_vs_NAWM$fdr) 
  enr.specificPVML_vs_NAWM$OR.odds.ratio <- as.numeric(enr.specificPVML_vs_NAWM$OR.odds.ratio) 
  enr.specificPVML_vs_NAWM$NOM_pval <- as.numeric(enr.specificPVML_vs_NAWM$NOM_pval)
  topspecificPVML_vs_NAWM <- topPath(fishKEGG ,"specificPVML_vs_NAWM", 0.5, 20)
  enr.specificPVML_vs_NAWM <- enr.specificPVML_vs_NAWM[match(topspecificPVML_vs_NAWM, rownames(enr.specificPVML_vs_NAWM)),]
  rownames(enr.specificPVML_vs_NAWM) <- gsub("KEGG", "",rownames(enr.specificPVML_vs_NAWM)) 
  enr.specificPVML_vs_NAWM=enr.specificPVML_vs_NAWM[order(enr.specificPVML_vs_NAWM$NOM_pval),]
  enr.specificPVML_vs_NAWM$comp = "specificPVML_vs_NAWM"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML = fishKEGG$commonPVML_vs_NAWM.PVML_vs_WML
  enr.commonPVML_vs_NAWM.PVML_vs_WML$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)
  enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval)
  enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
  topcommonPVML_vs_NAWM.PVML_vs_WML = subset(enr.commonPVML_vs_NAWM.PVML_vs_WML,NOM_pval<=0.1)
  topcommonPVML_vs_NAWM.PVML_vs_WML = topcommonPVML_vs_NAWM.PVML_vs_WML$pathway
  enr.commonPVML_vs_NAWM.PVML_vs_WML <- enr.commonPVML_vs_NAWM.PVML_vs_WML[match(topcommonPVML_vs_NAWM.PVML_vs_WML, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML=enr.commonPVML_vs_NAWM.PVML_vs_WML[order(enr.commonPVML_vs_NAWM.PVML_vs_WML$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML$comp = "commonPVML_vs_NAWM & PVML_vs_WML"
  
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL = fishKEGG$commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- topPath(fishKEGG ,"commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL", 0.5,20)
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL <- enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[match(topcommonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL=enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL[order(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML & WML_vs_CL"

  enr.commonPVML_vs_NAWM.WML_vs_CL = fishKEGG$commonPVML_vs_NAWM.WML_vs_CL
  enr.commonPVML_vs_NAWM.WML_vs_CL$pathway = rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)
  enr.commonPVML_vs_NAWM.WML_vs_CL$fdr <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$fdr) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$OR.odds.ratio) 
  enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval <- as.numeric(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval)
  topcommonPVML_vs_NAWM.WML_vs_CL = subset(enr.commonPVML_vs_NAWM.WML_vs_CL,NOM_pval<=0.1)
  topcommonPVML_vs_NAWM.WML_vs_CL = topcommonPVML_vs_NAWM.WML_vs_CL$pathway
  enr.commonPVML_vs_NAWM.WML_vs_CL <- enr.commonPVML_vs_NAWM.WML_vs_CL[match(topcommonPVML_vs_NAWM.WML_vs_CL, rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)),]
  rownames(enr.commonPVML_vs_NAWM.WML_vs_CL) <- gsub("KEGG", "",rownames(enr.commonPVML_vs_NAWM.WML_vs_CL)) 
  enr.commonPVML_vs_NAWM.WML_vs_CL=enr.commonPVML_vs_NAWM.WML_vs_CL[order(enr.commonPVML_vs_NAWM.WML_vs_CL$NOM_pval),]
  enr.commonPVML_vs_NAWM.WML_vs_CL$comp = "commonPVML_vs_NAWM & PVML_vs_WML"  

```


### Specific WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificWML_vs_CL, aes(x=reorder(rownames(enr.specificWML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```

### Specific PVML_vs_NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.specificPVML_vs_NAWM, aes(x=reorder(rownames(enr.specificPVML_vs_NAWM),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "PVML_vs_NAWM") + xlab("") + coord_flip() + theme_bw()
```

### Common PVML_vs_NAWM & PVML_vs_WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "common PVML_vs_NAWM & PVML_vs_WML") + xlab("") + coord_flip() + theme_bw()
```

### Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.PVML_vs_WML.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & PVML_vs_WML & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```

### common PVML_vs_NAWM & WML_vs_CL
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 10, fig.height = 8}
ggplot(enr.commonPVML_vs_NAWM.WML_vs_CL, aes(x=reorder(rownames(enr.commonPVML_vs_NAWM.WML_vs_CL),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) + geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "Common PVML_vs_NAWM & WML_vs_CL") + xlab("") + coord_flip() + theme_bw()
```


# This is an analysis performed on the bulk RNA-Seq dataset from the paper titled:   
**Brain macrophages acquire distinct transcriptomes in multiple sclerosis lesions and normal appearing white matter**.  

The authors of this paper have sequenced 68 samples of human brain material.
Here is the [paper](https://www.biorxiv.org/content/10.1101/2021.10.27.465877v1.full)

*Abstract*

Multiple sclerosis (MS) is a disease of the central nervous system (CNS) that is characterized by inflammation and focal areas of demyelination, ultimately resulting in axonal degradation and neuronal loss. Several lines of evidence point towards a role for microglia and other brain macrophages in disease initiation and progression, but exactly how lesion formation is triggered is currently unknown. Here, we characterized early changes in MS brain tissue through transcriptomic analysis of normal appearing white matter (NAWM). We found that NAWM was characterized by enriched expression of genes associated with inflammation and cellular stress derived from brain macrophages. Single cell RNA sequencing confirmed an early stress response in brain macrophages in NAWM and identified specific macrophage subsets that associate with different stages of demyelinating lesions. These early changes associated with lesion development in MS brain tissue may provide therapeutic targets to limit lesion progression and demyelination.


```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE}
goBP = read.gmt('/Users/macbookpro/OneDrive/Documents/UDEM/Master/BulkData/GO/c5.go.bp.v7.5.1.symbols.gmt')
metaP <- read.table("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/SraRunTable.txt", header = TRUE, sep = ",")
metaP <- data.frame(sample = metaP$Sample.Name, group=metaP$Group)
metaP <- metaP[!duplicated(metaP[ , c("sample", "group")]), ]
metaP$group <- toupper(metaP$group)

refP <- read.csv("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/ref.csv", header = TRUE, sep = ";")
refP <- data.frame(sample = refP$sample, sampleName = refP$sampleName)
refP <- merge(metaP,refP,by="sample")

countsP <- read.table("/Users/macbookpro/OneDrive/Documents/UDEM/Master/GSE179427/GSE179427_countmtx.csv", header = TRUE, sep = ";",row.names=1)
countsP[is.na(countsP)] <- 0
refPP <- data.frame(sampleName = colnames(countsP)) 
refPP <- merge(refPP, refP, by="sampleName")

refPP$group <- gsub('NAGM','NAWM',refPP$group)

con_colorsP = c('olivedrab2','deepskyblue','darkorange')
names(con_colorsP)=c('CONTROL','NAWM','LESION')

##remove mitochondrial gene
convP=select(org.Hs.eg.db,keys=rownames(EP$E),keytype='ENSEMBL',columns=c('GENENAME','SYMBOL'))
refGP=convP[!duplicated(convP[,1]),]
rownames(refGP)=refGP[,1]
c=subset(convP,grepl('mitochon',convP[,2]))
cc = subset(c,grepl('^MT', c[,3]))
NGMito = countsP[!rownames(countsP) %in% cc$ENSEMBL,]


## limma DEA
d0P <- DGEList(NGMito)
#d0P <- DGEList(countsP) with mitochondrial gene
d0P <- calcNormFactors(d0P)
cutoff <- 5
dropP <- which(apply(cpm(d0P), 1, max) < cutoff)
dP <- d0P[-dropP,] 


metadataP <- data.frame(sampleName=rownames(dP$samples))
metadataP$group <- NA
for (i in 1:nrow(metadataP)) {
  for (j in 1:nrow(refPP)) {
    if(metadataP$sampleName[i]==refPP$sampleName[j]){
      metadataP$group[i] = refPP$group[j]
      break
    }
    
  }
}
rownames(metadataP) <- metadataP$sampleName
metadataP <- subset(metadataP, select=-c(sampleName))

mmP <- model.matrix(~0 + group:group,data=metadataP)
EP <- voom(dP, mmP, plot = F)
```

### Principal component analysis of 68 white matter brain tissue samples.

#### 26 Control
#### 31 NAWM
#### 11 Lesion

```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
p <- pca(EP$E, metadata = metadataP)
plot1 <- biplot(p, colby = 'group',hline = 0, vline = 0,legendPosition = 'right', lab="")
plot2<- plot1  +  scale_color_manual( name= "Groups",labels = c("CONTROL","LESION","NAWM"), values= c('olivedrab2','darkorange4','goldenrod2')) 
plot2
```

## Comparisons between conditions
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE}
fitP <- lmFit(EP, mmP)
contrP <- makeContrasts(
              A= groupNAWM - groupCONTROL,
              B= groupLESION - groupNAWM,
              C= groupLESION - groupCONTROL, levels = colnames(coef(fitP)))
tmpP <- contrasts.fit(fitP,contrP)
tmpP <- eBayes(tmpP)

res.AP <- topTable(tmpP, sort.by = "P", n = Inf, coef='A',adjust="fdr")
res.BP <- topTable(tmpP, sort.by = "P", n = Inf, coef='B',adjust="fdr")
res.CP <- topTable(tmpP, sort.by = "P", n = Inf, coef='C',adjust="fdr")

res.AP$condition = 'A: NAWM vs CWM'
res.BP$condition = 'B: WML vs NAWM'
res.CP$condition = 'C: WML vs CWM'

res.AP$geneid = rownames(res.AP)
res.BP$geneid = rownames(res.BP)
res.CP$geneid = rownames(res.CP)

res.T = rbind(res.AP,res.BP,res.CP)
res.T$symbol = refGP[res.T$geneid,3]
res.AP$symbol = refGP[res.AP$geneid,3]
res.BP$symbol = refGP[res.BP$geneid,3]
res.CP$symbol = refGP[res.CP$geneid,3]

degs.AP = subset(res.T, condition == 'A: NAWM vs CWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol
degs.BP = subset(res.T, condition == 'B: WML vs NAWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol
degs.CP = subset(res.T, condition == 'C: WML vs CWM' & adj.P.Val<0.05 & abs(logFC)>1)$symbol

res.T$sign = 'NS'
res.T[which(res.T$adj.P.Val<0.05 & res.T$logFC>1),'sign']='UP'
res.T[which(res.T$adj.P.Val<0.05 & res.T$logFC<(-1)),'sign']='DN'
res.BP$sign = 'NS'
res.BP[which(res.BP$adj.P.Val<0.05 & res.BP$logFC>1),'sign']='UP'
res.BP[which(res.BP$adj.P.Val<0.05 & res.BP$logFC<(-1)),'sign']='DN'


###
res.BP<-na.omit(res.BP[order(-res.BP$logFC),])
ggplot(res.T,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+theme_bw()+scale_color_manual(values=sign_cols)+geom_point(data=res.BP[1:10,], col="black") +geom_label_repel(data=res.BP[1:30,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) +theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))
###

res.AP<-na.omit(res.AP[order(res.AP$P.Value),])
res.BP<-na.omit(res.BP[order(res.BP$P.Value),])
res.CP<-na.omit(res.CP[order(res.CP$P.Value),])
```

### Volcano plot with the top 10 significant genes
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 6, fig.height = 4}
options(ggrepel.max.overlaps = Inf)
volcanoTop10 <- ggplot(res.T,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+theme_bw()+scale_color_manual(values=c('deepskyblue','grey75','tomato'))+
  geom_point(data=res.AP[1:10,], col="black") +geom_label_repel(data=res.AP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) + 
  geom_point(data=res.BP[1:10,], col="black") +geom_label_repel(data=res.BP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) + 
  geom_point(data=res.CP[1:10,], col="black") +geom_label_repel(data=res.CP[1:10,],aes(label=symbol),force=10,size = 2,fill = alpha(c("white"),0.5)) + 
  theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+ facet_wrap(~condition)
volcanoTop10
```

### Volcano plot with genes of interest (top significant genes from Bulk);

*ICAM1,VCAM1,"DNAH5","CIBAR2","LRRIQ1","ODAD2","CD24","DLEC1","CFAP54","FHAD1","DYNLT5","CFAP43","RSPH1","DNAI3"*
```{r  echo=TRUE, message=FALSE, warning=FALSE, paged.print=TRUE, fig.width = 6, fig.height = 4}
res=res[order(res$P.Value),]

genelabelsP <- c("CD24")
geneLabelsP2 <- unique(head(resA$gene_name,30))
genelabelsP <- append(genelabelsP, geneLabelsP2)


#### put gene of the public data on our comparaison

#geneLabelsPub <- unique(head(res.BP$symbol,30))
#geneLabelsPubSignificant <- subset(res.BP,sign=="UP")
#geneLabelsPubSignificant <- unique(geneLabelsPubSignificant$symbol)

#ggplot(resA,aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=sign_cols)+ geom_point(data=resA[match(geneLabelsPub, resA$gene_name),], col="black") +geom_label_repel(data=resA[match(geneLabelsPub$symbol, resA$gene_name),],aes(label=geneLabelsPub),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("WML vs NAWM")
###

p1 = ggplot(subset(res.T,condition=="A: NAWM vs CWM"),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.AP[match(genelabelsP, res.AP$symbol),], col="black") +geom_label_repel(data=res.AP[match(genelabelsP, res.AP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("A: NAWM vs CWM")

p2 = ggplot(subset(res.T,condition=='B: WML vs NAWM'),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.BP[match(genelabelsP, res.BP$symbol),], col="black") +geom_label_repel(data=res.BP[match(genelabelsP, res.BP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("B: WML vs NAWM")

p3 = ggplot(subset(res.T,condition=='C: WML vs CWM'),aes(x=logFC,y=-log10(P.Value)))+geom_point(aes(col=sign))+ scale_color_manual(values=c('deepskyblue','grey75','tomato'))+ geom_point(data=res.CP[match(genelabelsP, res.CP$symbol),], col="black") +geom_label_repel(data=res.CP[match(genelabelsP, res.CP$symbol),],aes(label=genelabelsP),force=10,size = 3.5,fill = alpha(c("white"),0.5),fontface = 'bold')+ theme(legend.position = "none",strip.background = element_rect(fill=c("grey95")))+theme_bw()+ggtitle("C: WML vs CWM")

ggarrange(
  p1,p2,p3, common.legend = TRUE, legend = "bottom", ncol = 3)

```

## Boxplots of select genes demonstrating significant differential gene expression.

```{r, echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 8, fig.height = 7}
#CIBAR2 <- plot.geneP("CIBAR2",cols=con_colorsP)
DNAH5 <- plot.geneP("DNAH5",cols=con_colorsP)
#irf1 <- plot.geneP("IRF1",cols=con_colorsP)
LRRIQ1 <- plot.geneP("LRRIQ1",cols=con_colorsP)
icam1 <- plot.geneP("ICAM1",cols=con_colorsP)
ODAD2 <- plot.geneP("ODAD2",cols=con_colorsP)
vcam1 <- plot.geneP("VCAM1",cols=con_colorsP)
#CD24 <- plot.geneP("CD24",cols=con_colorsP)
dlec1 <- plot.geneP("DLEC1",cols=con_colorsP)
cfap54 <- plot.geneP("CFAP54",cols=con_colorsP)
fhad1 <- plot.geneP("FHAD1",cols=con_colorsP)
dynlt5 <- plot.geneP("DYNLT5",cols=con_colorsP)
cfap43 <- plot.geneP("CFAP43",cols=con_colorsP)
rsph1 <- plot.geneP("RSPH1",cols=con_colorsP)
dnai3 <- plot.geneP("DNAI3",cols=con_colorsP)
#cxcl14 <- plot.geneP("CXCL14",cols=con_colorsP)
ggarrange(DNAH5, LRRIQ1, ODAD2, icam1, dlec1, cfap54, vcam1, fhad1, dynlt5, cfap43, rsph1, dnai3, ncol = 3, nrow = 4,align = c("hv"), common.legend = TRUE, legend="bottom")
```

# Gene Ontology enrichment analysis (Top pathway) {.tabset}

## Biological Process {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 8, fig.height = 7}
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))

listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,goBP,unique(res.T$symbol))

##top10 pathway BP
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr) 
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio) 
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
topAP <- topPath(fishP ,"specificAP", 0.5, 14)
tabAP1 <- tabAP[match(topAP, rownames(tabAP)),]
rownames(tabAP1) <- gsub("GOBP_", "",rownames(tabAP1)) 
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1)) 
rownames(tabAP) <- gsub("GOBP_", "",rownames(tabAP)) 
rownames(tabAP) <- gsub("_", " ",rownames(tabAP)) 

tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio) 
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
topBP <- topPath(fishP ,"specificBP", 0.5, 14)
tabBP1 <- tabBP[match(topBP, rownames(tabBP)),]
rownames(tabBP1) <- gsub("GOBP_", "",rownames(tabBP1)) 
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1)) 
rownames(tabBP) <- gsub("GOBP_", "",rownames(tabBP)) 
rownames(tabBP) <- gsub("_", " ",rownames(tabBP)) 

tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio) 
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 14)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("GOBP_", "",rownames(tabCP1)) 
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1)) 
rownames(tabCP) <- gsub("GOBP_", "",rownames(tabCP)) 
rownames(tabCP) <- gsub("_", " ",rownames(tabCP)) 
```


### Top 10 pathway CWM vs NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Biological Process 2021") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```


### Top 10 pathway NAWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Biological Process 2021") + coord_flip()+ theme_bw()+ theme(axis.text = element_text(face="bold"))
```


### Top 10 pathway CWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+theme_bw()+ theme(axis.text = element_text(face="bold"))
```

## Molecular Function {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 8, fig.height = 7}
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))

listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,mf,unique(res.T$symbol))

##top10 pathway MF
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr) 
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio) 
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
topAP <- topPath(fishP ,"specificAP", 0.5, 12)
tabAP1 <- tabAP[match(topAP, rownames(tabAP)),]
rownames(tabAP1) <- gsub("GOMF_", "",rownames(tabAP1)) 
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1)) 
rownames(tabAP) <- gsub("GOMF_", "",rownames(tabAP)) 
rownames(tabAP) <- gsub("_", " ",rownames(tabAP)) 

tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio) 
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.05)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[-c(8, 9, 14, 15), ]
tabBP1 <- tabBP1[1:12,]
rownames(tabBP1) <- gsub("GOMF_", "",rownames(tabBP1)) 
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1)) 
rownames(tabBP) <- gsub("GOMF_", "",rownames(tabBP)) 
rownames(tabBP) <- gsub("_", " ",rownames(tabBP)) 

tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio) 
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("GOMF_", "",rownames(tabCP1)) 
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1)) 
rownames(tabCP) <- gsub("GOMF_", "",rownames(tabCP)) 
rownames(tabCP) <- gsub("_", " ",rownames(tabCP)) 
```


### Top 10 pathway CWM vs NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Molecular Function 2021") + xlab("") + coord_flip() + theme_bw()+ theme(axis.text = element_text(face="bold"))
```


### Top 10 pathway NAWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+ theme_bw()+ theme(axis.text = element_text(face="bold"))
```


### Top 10 pathway CWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Molecular Function 2021") + coord_flip()+theme_bw()+ theme(axis.text = element_text(face="bold"))
```

## Transcription factor targets {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 8, fig.height = 7}
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))

listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,tft,unique(res.T$symbol))

##top10 pathway TFT
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr) 
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio) 
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
tabAP=tabAP[order(tabAP$NOM_pval),]
topAP = subset(tabAP,NOM_pval<=0.05)
topAP$pathway = rownames(topAP)
tabAP1 <- tabAP[match(rownames(topAP), rownames(tabAP)),]
tabAP1 <- tabAP1[1:12,]
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1)) 
rownames(tabAP) <- gsub("_", " ",rownames(tabAP)) 

tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio) 
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.05)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[1:12,]
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1)) 
rownames(tabBP) <- gsub("_", " ",rownames(tabBP)) 

tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio) 
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1)) 
rownames(tabCP) <- gsub("_", " ",rownames(tabCP)) 
```


### Top 10 pathway CWM vs NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO Transcription factor targets 2021") + xlab("") + coord_flip() + theme_bw()
```


### Top 10 pathway NAWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO Transcription factor targets 2021") + coord_flip()+ theme_bw()
```


### Top 10 pathway CWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO Transcription factor targets 2021") + coord_flip()+theme_bw()
```

## KEGG pathway database {.tabset}
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 8, fig.height = 7}
##pathway
specificAP = as.character(na.omit(degs.AP[!degs.AP%in%degs.BP & !degs.AP%in%degs.CP]))
specificBP = as.character(na.omit(degs.BP[!degs.BP%in%degs.AP & !degs.BP%in%degs.CP]))
specificCP = as.character(na.omit(degs.CP[!degs.CP%in%degs.AP & !degs.CP%in%degs.BP]))

listP = list(specificAP=specificAP, specificBP=specificBP,specificCP=specificCP)
fishP = fisher_enrichment(listP,kegg,unique(res.T$symbol))

##top10 pathway KEGG
tabAP <- fishP$specificAP
tabAP$fdr <- as.numeric(tabAP$fdr) 
tabAP$OR.odds.ratio <- as.numeric(tabAP$OR.odds.ratio) 
tabAP$NOM_pval <- as.numeric(tabAP$NOM_pval)
tabAP=tabAP[order(tabAP$NOM_pval),]
topAP = subset(tabAP,NOM_pval<=0.17)
topAP$pathway = rownames(topAP)
tabAP1 <- tabAP[match(rownames(topAP), rownames(tabAP)),]
rownames(tabAP1) <- gsub("KEGG", "",rownames(tabAP1)) 
rownames(tabAP1) <- gsub("_", " ",rownames(tabAP1)) 
rownames(tabAP) <- gsub("KEGG", "",rownames(tabAP)) 
rownames(tabAP) <- gsub("_", " ",rownames(tabAP)) 

tabBP <- fishP$specificBP
tabBP$fdr <- as.numeric(tabBP$fdr)
tabBP$OR.odds.ratio <- as.numeric(tabBP$OR.odds.ratio) 
tabBP$NOM_pval <- as.numeric(tabBP$NOM_pval)
tabBP=tabBP[order(tabBP$NOM_pval),]
topBP = subset(tabBP,NOM_pval<=0.3)
topBP$pathway = rownames(topBP)
tabBP1 <- tabBP[match(rownames(topBP), rownames(tabBP)),]
tabBP1 <- tabBP1[1:10,]
rownames(tabBP1) <- gsub("KEGG", "",rownames(tabBP1)) 
rownames(tabBP1) <- gsub("_", " ",rownames(tabBP1)) 
rownames(tabBP) <- gsub("KEGG", "",rownames(tabBP)) 
rownames(tabBP) <- gsub("_", " ",rownames(tabBP)) 

tabCP <- fishP$specificCP
tabCP$fdr <- as.numeric(tabCP$fdr)
tabCP$OR.odds.ratio <- as.numeric(tabCP$OR.odds.ratio) 
tabCP$NOM_pval <- as.numeric(tabCP$NOM_pval)
topCP <- topPath(fishP ,"specificCP", 0.5, 12)
tabCP1 <- tabCP[match(topCP, rownames(tabCP)),]
rownames(tabCP1) <- gsub("KEGG", "",rownames(tabCP1)) 
rownames(tabCP1) <- gsub("_", " ",rownames(tabCP1)) 
rownames(tabCP) <- gsub("KEGG", "",rownames(tabCP)) 
rownames(tabCP) <- gsub("_", " ",rownames(tabCP)) 
```


### Top 10 pathway CWM vs NAWM
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabAP1, aes(x=reorder(rownames(tabAP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13") +theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + labs(title = "CWM vs NAWM", subtitle = "GO KEGG pathway database 2021") + xlab("") + coord_flip() + theme_bw()
```


### Top 10 pathway NAWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabBP1, aes(x=reorder(rownames(tabBP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs(title = "NAWM vs WML", subtitle = "GO KEGG pathway database 2021") + coord_flip()+ theme_bw()
```


### Top 10 pathway CWM vs WML
```{r echo=TRUE, message=FALSE, warning=FALSE, paged.print=FALSE, fig.width = 4, fig.height = 3}
ggplot(tabCP1, aes(x=reorder(rownames(tabCP1),-rank(NOM_pval)), y=-log10(NOM_pval),fill=-log10(NOM_pval))) +geom_col(width=.65)+ scale_fill_gradient(low="#F6BDC0",high="#DC1C13")+theme(axis.text=element_text(size=8)) + scale_x_discrete(label = function(x) stringr::str_wrap(gsub("_", " ",x), width = 30)) + xlab("")+ labs( title = "CWM vs WML", subtitle = "GO KEGG pathway database 2021") + coord_flip()+theme_bw()
```





